package alexatop import ( "archive/zip" "bytes" _ "embed" "encoding/csv" "fmt" "io" "math/rand" "strings" "sync" ) //go:embed backup/alexa.zip var alexaZip []byte var ( urls []string fetchError error once sync.Once ) // FetchAlexaTop1M fetches the Alexa Top 1M websites from the embedded zip file and returns a list of URLs func FetchAlexaTop1M() ([]string, error) { once.Do(func() { // Step 1: Open the zip file from the embedded data r, err := zip.NewReader(bytes.NewReader(alexaZip), int64(len(alexaZip))) if err != nil { fetchError = fmt.Errorf("failed to open zip file: %v", err) return } // Step 2: Locate and read the CSV file within the zip var csvFile *zip.File for _, f := range r.File { if strings.HasSuffix(f.Name, ".csv") { csvFile = f break } } if csvFile == nil { fetchError = fmt.Errorf("csv file not found in zip archive") return } rc, err := csvFile.Open() if err != nil { fetchError = fmt.Errorf("failed to open csv file: %v", err) return } defer rc.Close() // Step 3: Parse the CSV file and extract URLs csvReader := csv.NewReader(rc) var fetchedUrls []string for { record, err := csvReader.Read() if err == io.EOF { break } if err != nil { fetchError = fmt.Errorf("error reading csv: %v", err) return } if len(record) < 2 { continue } fetchedUrls = append(fetchedUrls, fmt.Sprintf("http://%s", record[1])) } urls = fetchedUrls }) return urls, fetchError } // RandomSite returns a random site from the list of Alexa Top 1M URLs func RandomSite() (string, error) { urls, err := FetchAlexaTop1M() if err != nil { return "", err } return urls[rand.Intn(len(urls))], nil } // NthSite returns the nth site from the list of Alexa Top 1M URLs func NthSite(n int) (string, error) { urls, err := FetchAlexaTop1M() if err != nil { return "", err } if n < 0 || n >= len(urls) { return "", fmt.Errorf("index out of range") } return urls[n], nil } // RandomSites returns n random sites from the list of Alexa Top 1M URLs func RandomSites(n int) ([]string, error) { urls, err := FetchAlexaTop1M() if err != nil { return nil, err } if n < 0 || n > len(urls) { return nil, fmt.Errorf("invalid number of sites requested") } selected := make([]string, n) for i := range selected { selected[i] = urls[rand.Intn(len(urls))] } return selected, nil }