package puppeteerapiclient import ( "bytes" "context" "crypto/md5" "encoding/hex" "encoding/json" "fmt" "io/ioutil" "math" "net/http" "time" ) const ( MaxRetries = 5 ) type Client struct { BaseURL string Salt string } type ScrapeRequest struct { URL string `json:"url"` Selector string `json:"selector"` Hash string `json:"hash"` } type ScrapeResponse struct { URL string `json:"url"` Selector string `json:"selector"` Content string `json:"content"` } func NewClient(baseURL, salt string) *Client { return &Client{ BaseURL: baseURL, Salt: salt, } } func (c *Client) CalculateHash(url string) string { data := url + ":" + c.Salt hash := md5.Sum([]byte(data)) return hex.EncodeToString(hash[:]) } func (c *Client) Scrape(ctx context.Context, url, selector string) (ScrapeResponse, error) { if selector == "" { return ScrapeResponse{}, fmt.Errorf("selector is required") } hash := c.CalculateHash(url) requestBody, err := json.Marshal(ScrapeRequest{ URL: url, Selector: selector, Hash: hash, }) if err != nil { return ScrapeResponse{}, fmt.Errorf("failed to marshal request: %v", err) } client := &http.Client{} var resp *http.Response var body []byte startTime := time.Now() for attempt := 0; attempt < MaxRetries; attempt++ { req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.BaseURL+"/scrape", bytes.NewBuffer(requestBody)) if err != nil { return ScrapeResponse{}, fmt.Errorf("failed to create request: %v", err) } req.Header.Set("Content-Type", "application/json") resp, err = client.Do(req) if err == nil && resp.StatusCode == http.StatusOK { defer resp.Body.Close() body, err = ioutil.ReadAll(resp.Body) if err != nil { return ScrapeResponse{}, fmt.Errorf("failed to read response body: %v", err) } content := string(body) return ScrapeResponse{ URL: url, Selector: selector, Content: content, }, nil } if resp != nil { resp.Body.Close() } select { case <-ctx.Done(): totalDuration := time.Since(startTime) return ScrapeResponse{}, fmt.Errorf("context cancelled after %d retries and %v: %v", attempt+1, totalDuration, ctx.Err()) case <-time.After(time.Duration(math.Pow(2, float64(attempt))) * time.Second): // continue to next retry } } totalDuration := time.Since(startTime) if err != nil { return ScrapeResponse{}, fmt.Errorf("failed to send request after %d retries and %v: %v", MaxRetries, totalDuration, err) } return ScrapeResponse{}, fmt.Errorf("received non-OK response after %d retries and %v: %s", MaxRetries, totalDuration, resp.Status) }