88 lines
1.9 KiB
Go
88 lines
1.9 KiB
Go
|
package puppeteerapiclient
|
||
|
|
||
|
import (
|
||
|
"bytes"
|
||
|
"context"
|
||
|
"crypto/md5"
|
||
|
"encoding/hex"
|
||
|
"encoding/json"
|
||
|
"fmt"
|
||
|
"io/ioutil"
|
||
|
"net/http"
|
||
|
)
|
||
|
|
||
|
type Client struct {
|
||
|
BaseURL string
|
||
|
Salt string
|
||
|
}
|
||
|
|
||
|
type ScrapeRequest struct {
|
||
|
URL string `json:"url"`
|
||
|
Selector string `json:"selector"`
|
||
|
Hash string `json:"hash"`
|
||
|
}
|
||
|
|
||
|
type ScrapeResponse struct {
|
||
|
URL string `json:"url"`
|
||
|
Selector string `json:"selector"`
|
||
|
Content string `json:"content"`
|
||
|
}
|
||
|
|
||
|
func NewClient(baseURL, salt string) *Client {
|
||
|
return &Client{
|
||
|
BaseURL: baseURL,
|
||
|
Salt: salt,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (c *Client) CalculateHash(url string) string {
|
||
|
// The hash should be calculated by appending the salt to the URL with a colon separator.
|
||
|
data := url + ":" + c.Salt
|
||
|
hash := md5.Sum([]byte(data))
|
||
|
return hex.EncodeToString(hash[:])
|
||
|
}
|
||
|
|
||
|
func (c *Client) Scrape(ctx context.Context, url, selector string) (ScrapeResponse, error) {
|
||
|
if selector == "" {
|
||
|
return ScrapeResponse{}, fmt.Errorf("selector is required")
|
||
|
}
|
||
|
|
||
|
hash := c.CalculateHash(url)
|
||
|
|
||
|
requestBody, err := json.Marshal(ScrapeRequest{
|
||
|
URL: url,
|
||
|
Selector: selector,
|
||
|
Hash: hash,
|
||
|
})
|
||
|
if err != nil {
|
||
|
return ScrapeResponse{}, fmt.Errorf("failed to marshal request: %v", err)
|
||
|
}
|
||
|
|
||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.BaseURL+"/scrape", bytes.NewBuffer(requestBody))
|
||
|
if err != nil {
|
||
|
return ScrapeResponse{}, fmt.Errorf("failed to create request: %v", err)
|
||
|
}
|
||
|
req.Header.Set("Content-Type", "application/json")
|
||
|
|
||
|
client := &http.Client{}
|
||
|
resp, err := client.Do(req)
|
||
|
if err != nil {
|
||
|
return ScrapeResponse{}, fmt.Errorf("failed to send request: %v", err)
|
||
|
}
|
||
|
defer resp.Body.Close()
|
||
|
|
||
|
body, err := ioutil.ReadAll(resp.Body)
|
||
|
if err != nil {
|
||
|
return ScrapeResponse{}, fmt.Errorf("failed to read response body: %v", err)
|
||
|
}
|
||
|
|
||
|
// Since the response is HTML, we don't need to unmarshal JSON.
|
||
|
content := string(body)
|
||
|
|
||
|
return ScrapeResponse{
|
||
|
URL: url,
|
||
|
Selector: selector,
|
||
|
Content: content,
|
||
|
}, nil
|
||
|
}
|