puppeteerapiclient/client.go

88 lines
1.9 KiB
Go
Raw Normal View History

2024-06-02 18:32:29 +00:00
package puppeteerapiclient
import (
"bytes"
"context"
"crypto/md5"
"encoding/hex"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
)
type Client struct {
BaseURL string
Salt string
}
type ScrapeRequest struct {
URL string `json:"url"`
Selector string `json:"selector"`
Hash string `json:"hash"`
}
type ScrapeResponse struct {
URL string `json:"url"`
Selector string `json:"selector"`
Content string `json:"content"`
}
func NewClient(baseURL, salt string) *Client {
return &Client{
BaseURL: baseURL,
Salt: salt,
}
}
func (c *Client) CalculateHash(url string) string {
// The hash should be calculated by appending the salt to the URL with a colon separator.
data := url + ":" + c.Salt
hash := md5.Sum([]byte(data))
return hex.EncodeToString(hash[:])
}
func (c *Client) Scrape(ctx context.Context, url, selector string) (ScrapeResponse, error) {
if selector == "" {
return ScrapeResponse{}, fmt.Errorf("selector is required")
}
hash := c.CalculateHash(url)
requestBody, err := json.Marshal(ScrapeRequest{
URL: url,
Selector: selector,
Hash: hash,
})
if err != nil {
return ScrapeResponse{}, fmt.Errorf("failed to marshal request: %v", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.BaseURL+"/scrape", bytes.NewBuffer(requestBody))
if err != nil {
return ScrapeResponse{}, fmt.Errorf("failed to create request: %v", err)
}
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return ScrapeResponse{}, fmt.Errorf("failed to send request: %v", err)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return ScrapeResponse{}, fmt.Errorf("failed to read response body: %v", err)
}
// Since the response is HTML, we don't need to unmarshal JSON.
content := string(body)
return ScrapeResponse{
URL: url,
Selector: selector,
Content: content,
}, nil
}