puppeteerapiclient/client.go

113 lines
2.6 KiB
Go

package puppeteerapiclient
import (
"bytes"
"context"
"crypto/md5"
"encoding/hex"
"encoding/json"
"fmt"
"io/ioutil"
"math"
"net/http"
"time"
)
const (
MaxRetries = 5
)
type Client struct {
BaseURL string
Salt string
}
type ScrapeRequest struct {
URL string `json:"url"`
Selector string `json:"selector"`
Hash string `json:"hash"`
}
type ScrapeResponse struct {
URL string `json:"url"`
Selector string `json:"selector"`
Content string `json:"content"`
}
func NewClient(baseURL, salt string) *Client {
return &Client{
BaseURL: baseURL,
Salt: salt,
}
}
func (c *Client) CalculateHash(url string) string {
data := url + ":" + c.Salt
hash := md5.Sum([]byte(data))
return hex.EncodeToString(hash[:])
}
func (c *Client) Scrape(ctx context.Context, url, selector string) (ScrapeResponse, error) {
if selector == "" {
return ScrapeResponse{}, fmt.Errorf("selector is required")
}
hash := c.CalculateHash(url)
requestBody, err := json.Marshal(ScrapeRequest{
URL: url,
Selector: selector,
Hash: hash,
})
if err != nil {
return ScrapeResponse{}, fmt.Errorf("failed to marshal request: %v", err)
}
client := &http.Client{}
var resp *http.Response
var body []byte
startTime := time.Now()
for attempt := 0; attempt < MaxRetries; attempt++ {
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.BaseURL+"/scrape", bytes.NewBuffer(requestBody))
if err != nil {
return ScrapeResponse{}, fmt.Errorf("failed to create request: %v", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err = client.Do(req)
if err == nil && resp.StatusCode == http.StatusOK {
defer resp.Body.Close()
body, err = ioutil.ReadAll(resp.Body)
if err != nil {
return ScrapeResponse{}, fmt.Errorf("failed to read response body: %v", err)
}
content := string(body)
return ScrapeResponse{
URL: url,
Selector: selector,
Content: content,
}, nil
}
if resp != nil {
resp.Body.Close()
}
select {
case <-ctx.Done():
totalDuration := time.Since(startTime)
return ScrapeResponse{}, fmt.Errorf("context cancelled after %d retries and %v: %v", attempt+1, totalDuration, ctx.Err())
case <-time.After(time.Duration(math.Pow(2, float64(attempt))) * time.Second):
// continue to next retry
}
}
totalDuration := time.Since(startTime)
if err != nil {
return ScrapeResponse{}, fmt.Errorf("failed to send request after %d retries and %v: %v", MaxRetries, totalDuration, err)
}
return ScrapeResponse{}, fmt.Errorf("received non-OK response after %d retries and %v: %s", MaxRetries, totalDuration, resp.Status)
}