This commit is contained in:
Jeffrey Paul 2024-06-02 14:31:12 -07:00
commit de3fdccaba
8 changed files with 261 additions and 0 deletions

1
.dockerignore Normal file
View File

@ -0,0 +1 @@
example

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
example

37
Dockerfile Normal file
View File

@ -0,0 +1,37 @@
# First stage: Use the golangci-lint image to run the linter
FROM golangci/golangci-lint:latest as lint
# Set the Current Working Directory inside the container
WORKDIR /app
# Copy the go.mod file and the rest of the application code
COPY go.mod ./
COPY . .
# Run golangci-lint
RUN golangci-lint run
# Second stage: Use the official Golang image to run tests
FROM golang:1.22 as test
# Set the Current Working Directory inside the container
WORKDIR /app
# Copy the go.mod file and the rest of the application code
COPY go.mod ./
COPY . .
# Run tests
RUN go test -v ./...
# Final stage: Combine the linting and testing stages
FROM golang:1.22 as final
# Ensure that the linting stage succeeded
WORKDIR /app
COPY --from=lint /app .
COPY --from=test /app .
# Set the final CMD to something minimal since we only needed to verify lint and tests during build
CMD ["echo", "Build and tests passed successfully!"]

22
Makefile Normal file
View File

@ -0,0 +1,22 @@
# Targets
.PHONY: all run test clean
all: run
example: *.go ./cmd/example/*.go
go build -o $@ ./cmd/example/main.go
run: example
./example
test:
go test -v ./...
clean:
rm -f example
docker:
docker build --progress plain .
lint:
golangci-lint run

119
alexatop.go Normal file
View File

@ -0,0 +1,119 @@
package alexatop
import (
"archive/zip"
"bytes"
_ "embed"
"encoding/csv"
"fmt"
"io"
"math/rand"
"strings"
"sync"
)
//go:embed backup/alexa.zip
var alexaZip []byte
var (
urls []string
fetchError error
once sync.Once
)
// FetchAlexaTop1M fetches the Alexa Top 1M websites from the embedded zip file and returns a list of URLs
func FetchAlexaTop1M() ([]string, error) {
once.Do(func() {
// Step 1: Open the zip file from the embedded data
r, err := zip.NewReader(bytes.NewReader(alexaZip), int64(len(alexaZip)))
if err != nil {
fetchError = fmt.Errorf("failed to open zip file: %v", err)
return
}
// Step 2: Locate and read the CSV file within the zip
var csvFile *zip.File
for _, f := range r.File {
if strings.HasSuffix(f.Name, ".csv") {
csvFile = f
break
}
}
if csvFile == nil {
fetchError = fmt.Errorf("csv file not found in zip archive")
return
}
rc, err := csvFile.Open()
if err != nil {
fetchError = fmt.Errorf("failed to open csv file: %v", err)
return
}
defer rc.Close()
// Step 3: Parse the CSV file and extract URLs
csvReader := csv.NewReader(rc)
var fetchedUrls []string
for {
record, err := csvReader.Read()
if err == io.EOF {
break
}
if err != nil {
fetchError = fmt.Errorf("error reading csv: %v", err)
return
}
if len(record) < 2 {
continue
}
fetchedUrls = append(fetchedUrls, fmt.Sprintf("http://%s", record[1]))
}
urls = fetchedUrls
})
return urls, fetchError
}
// RandomSite returns a random site from the list of Alexa Top 1M URLs
func RandomSite() (string, error) {
urls, err := FetchAlexaTop1M()
if err != nil {
return "", err
}
return urls[rand.Intn(len(urls))], nil
}
// NthSite returns the nth site from the list of Alexa Top 1M URLs
func NthSite(n int) (string, error) {
urls, err := FetchAlexaTop1M()
if err != nil {
return "", err
}
if n < 0 || n >= len(urls) {
return "", fmt.Errorf("index out of range")
}
return urls[n], nil
}
// RandomSites returns n random sites from the list of Alexa Top 1M URLs
func RandomSites(n int) ([]string, error) {
urls, err := FetchAlexaTop1M()
if err != nil {
return nil, err
}
if n < 0 || n > len(urls) {
return nil, fmt.Errorf("invalid number of sites requested")
}
selected := make([]string, n)
for i := range selected {
selected[i] = urls[rand.Intn(len(urls))]
}
return selected, nil
}

78
alexatop_test.go Normal file
View File

@ -0,0 +1,78 @@
package alexatop
import (
"testing"
)
func TestFetchAlexaTop1M(t *testing.T) {
urls, err := FetchAlexaTop1M()
if err != nil {
t.Fatalf("Expected no error, got %v", err)
}
if len(urls) < 700_000 {
t.Fatalf("Expected at least 700,000 URLs, got %d", len(urls))
}
}
func TestRandomSite(t *testing.T) {
site, err := RandomSite()
if err != nil {
t.Fatalf("Expected no error, got %v", err)
}
if site == "" {
t.Fatalf("Expected a valid URL, got an empty string")
}
}
func TestNthSite(t *testing.T) {
urls, err := FetchAlexaTop1M()
if err != nil {
t.Fatalf("Expected no error, got %v", err)
}
site, err := NthSite(99)
if err != nil {
t.Fatalf("Expected no error, got %v", err)
}
if site != urls[99] {
t.Fatalf("Expected %s, got %s", urls[99], site)
}
_, err = NthSite(len(urls))
if err == nil {
t.Fatalf("Expected error for out-of-range index, got none")
}
}
func TestRandomSites(t *testing.T) {
n := 5
randomSites, err := RandomSites(n)
if err != nil {
t.Fatalf("Expected no error, got %v", err)
}
if len(randomSites) != n {
t.Fatalf("Expected %d random sites, got %d", n, len(randomSites))
}
for _, site := range randomSites {
if site == "" {
t.Fatalf("Expected a valid URL, got an empty string")
}
}
}
func TestRandomSitesMoreThanAvailable(t *testing.T) {
urls, err := FetchAlexaTop1M()
if err != nil {
t.Fatalf("Expected no error, got %v", err)
}
_, err = RandomSites(len(urls) + 1)
if err == nil {
t.Fatalf("Expected error for requesting more sites than available, got none")
}
}

BIN
backup/alexa.zip Normal file

Binary file not shown.

3
go.mod Normal file
View File

@ -0,0 +1,3 @@
module sneak.berlin/go/alexatop
go 1.22.2