alexatop/alexatop.go

120 lines
2.4 KiB
Go

package alexatop
import (
"archive/zip"
"bytes"
_ "embed"
"encoding/csv"
"fmt"
"io"
"math/rand"
"strings"
"sync"
)
//go:embed backup/alexa.zip
var alexaZip []byte
var (
urls []string
fetchError error
once sync.Once
)
// FetchAlexaTop1M fetches the Alexa Top 1M websites from the embedded zip file and returns a list of URLs
func FetchAlexaTop1M() ([]string, error) {
once.Do(func() {
// Step 1: Open the zip file from the embedded data
r, err := zip.NewReader(bytes.NewReader(alexaZip), int64(len(alexaZip)))
if err != nil {
fetchError = fmt.Errorf("failed to open zip file: %v", err)
return
}
// Step 2: Locate and read the CSV file within the zip
var csvFile *zip.File
for _, f := range r.File {
if strings.HasSuffix(f.Name, ".csv") {
csvFile = f
break
}
}
if csvFile == nil {
fetchError = fmt.Errorf("csv file not found in zip archive")
return
}
rc, err := csvFile.Open()
if err != nil {
fetchError = fmt.Errorf("failed to open csv file: %v", err)
return
}
defer rc.Close()
// Step 3: Parse the CSV file and extract URLs
csvReader := csv.NewReader(rc)
var fetchedUrls []string
for {
record, err := csvReader.Read()
if err == io.EOF {
break
}
if err != nil {
fetchError = fmt.Errorf("error reading csv: %v", err)
return
}
if len(record) < 2 {
continue
}
fetchedUrls = append(fetchedUrls, fmt.Sprintf("http://%s", record[1]))
}
urls = fetchedUrls
})
return urls, fetchError
}
// RandomSite returns a random site from the list of Alexa Top 1M URLs
func RandomSite() (string, error) {
urls, err := FetchAlexaTop1M()
if err != nil {
return "", err
}
return urls[rand.Intn(len(urls))], nil
}
// NthSite returns the nth site from the list of Alexa Top 1M URLs
func NthSite(n int) (string, error) {
urls, err := FetchAlexaTop1M()
if err != nil {
return "", err
}
if n < 0 || n >= len(urls) {
return "", fmt.Errorf("index out of range")
}
return urls[n], nil
}
// RandomSites returns n random sites from the list of Alexa Top 1M URLs
func RandomSites(n int) ([]string, error) {
urls, err := FetchAlexaTop1M()
if err != nil {
return nil, err
}
if n < 0 || n > len(urls) {
return nil, fmt.Errorf("invalid number of sites requested")
}
selected := make([]string, n)
for i := range selected {
selected[i] = urls[rand.Intn(len(urls))]
}
return selected, nil
}