commit de3fdccabab44937d7af05ae343e0cd7e6c18347 Author: sneak Date: Sun Jun 2 14:31:12 2024 -0700 initial diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..33a9488 --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +example diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..33a9488 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +example diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..3ccf9a2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,37 @@ +# First stage: Use the golangci-lint image to run the linter +FROM golangci/golangci-lint:latest as lint + +# Set the Current Working Directory inside the container +WORKDIR /app + +# Copy the go.mod file and the rest of the application code +COPY go.mod ./ +COPY . . + +# Run golangci-lint +RUN golangci-lint run + +# Second stage: Use the official Golang image to run tests +FROM golang:1.22 as test + +# Set the Current Working Directory inside the container +WORKDIR /app + +# Copy the go.mod file and the rest of the application code +COPY go.mod ./ +COPY . . + +# Run tests +RUN go test -v ./... + +# Final stage: Combine the linting and testing stages +FROM golang:1.22 as final + +# Ensure that the linting stage succeeded +WORKDIR /app +COPY --from=lint /app . +COPY --from=test /app . + +# Set the final CMD to something minimal since we only needed to verify lint and tests during build +CMD ["echo", "Build and tests passed successfully!"] + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6b3b7de --- /dev/null +++ b/Makefile @@ -0,0 +1,22 @@ +# Targets +.PHONY: all run test clean + +all: run + +example: *.go ./cmd/example/*.go + go build -o $@ ./cmd/example/main.go + +run: example + ./example + +test: + go test -v ./... + +clean: + rm -f example + +docker: + docker build --progress plain . + +lint: + golangci-lint run diff --git a/alexatop.go b/alexatop.go new file mode 100644 index 0000000..1bdff86 --- /dev/null +++ b/alexatop.go @@ -0,0 +1,119 @@ +package alexatop + +import ( + "archive/zip" + "bytes" + _ "embed" + "encoding/csv" + "fmt" + "io" + "math/rand" + "strings" + "sync" +) + +//go:embed backup/alexa.zip +var alexaZip []byte + +var ( + urls []string + fetchError error + once sync.Once +) + +// FetchAlexaTop1M fetches the Alexa Top 1M websites from the embedded zip file and returns a list of URLs +func FetchAlexaTop1M() ([]string, error) { + once.Do(func() { + // Step 1: Open the zip file from the embedded data + r, err := zip.NewReader(bytes.NewReader(alexaZip), int64(len(alexaZip))) + if err != nil { + fetchError = fmt.Errorf("failed to open zip file: %v", err) + return + } + + // Step 2: Locate and read the CSV file within the zip + var csvFile *zip.File + for _, f := range r.File { + if strings.HasSuffix(f.Name, ".csv") { + csvFile = f + break + } + } + if csvFile == nil { + fetchError = fmt.Errorf("csv file not found in zip archive") + return + } + + rc, err := csvFile.Open() + if err != nil { + fetchError = fmt.Errorf("failed to open csv file: %v", err) + return + } + defer rc.Close() + + // Step 3: Parse the CSV file and extract URLs + csvReader := csv.NewReader(rc) + var fetchedUrls []string + for { + record, err := csvReader.Read() + if err == io.EOF { + break + } + if err != nil { + fetchError = fmt.Errorf("error reading csv: %v", err) + return + } + if len(record) < 2 { + continue + } + fetchedUrls = append(fetchedUrls, fmt.Sprintf("http://%s", record[1])) + } + + urls = fetchedUrls + }) + + return urls, fetchError +} + +// RandomSite returns a random site from the list of Alexa Top 1M URLs +func RandomSite() (string, error) { + urls, err := FetchAlexaTop1M() + if err != nil { + return "", err + } + + return urls[rand.Intn(len(urls))], nil +} + +// NthSite returns the nth site from the list of Alexa Top 1M URLs +func NthSite(n int) (string, error) { + urls, err := FetchAlexaTop1M() + if err != nil { + return "", err + } + + if n < 0 || n >= len(urls) { + return "", fmt.Errorf("index out of range") + } + + return urls[n], nil +} + +// RandomSites returns n random sites from the list of Alexa Top 1M URLs +func RandomSites(n int) ([]string, error) { + urls, err := FetchAlexaTop1M() + if err != nil { + return nil, err + } + + if n < 0 || n > len(urls) { + return nil, fmt.Errorf("invalid number of sites requested") + } + + selected := make([]string, n) + for i := range selected { + selected[i] = urls[rand.Intn(len(urls))] + } + + return selected, nil +} diff --git a/alexatop_test.go b/alexatop_test.go new file mode 100644 index 0000000..79f06ac --- /dev/null +++ b/alexatop_test.go @@ -0,0 +1,78 @@ +package alexatop + +import ( + "testing" +) + +func TestFetchAlexaTop1M(t *testing.T) { + urls, err := FetchAlexaTop1M() + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } + + if len(urls) < 700_000 { + t.Fatalf("Expected at least 700,000 URLs, got %d", len(urls)) + } +} + +func TestRandomSite(t *testing.T) { + site, err := RandomSite() + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } + + if site == "" { + t.Fatalf("Expected a valid URL, got an empty string") + } +} + +func TestNthSite(t *testing.T) { + urls, err := FetchAlexaTop1M() + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } + + site, err := NthSite(99) + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } + + if site != urls[99] { + t.Fatalf("Expected %s, got %s", urls[99], site) + } + + _, err = NthSite(len(urls)) + if err == nil { + t.Fatalf("Expected error for out-of-range index, got none") + } +} + +func TestRandomSites(t *testing.T) { + n := 5 + randomSites, err := RandomSites(n) + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } + + if len(randomSites) != n { + t.Fatalf("Expected %d random sites, got %d", n, len(randomSites)) + } + + for _, site := range randomSites { + if site == "" { + t.Fatalf("Expected a valid URL, got an empty string") + } + } +} + +func TestRandomSitesMoreThanAvailable(t *testing.T) { + urls, err := FetchAlexaTop1M() + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } + + _, err = RandomSites(len(urls) + 1) + if err == nil { + t.Fatalf("Expected error for requesting more sites than available, got none") + } +} diff --git a/backup/alexa.zip b/backup/alexa.zip new file mode 100644 index 0000000..0154db0 Binary files /dev/null and b/backup/alexa.zip differ diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..72580c9 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module sneak.berlin/go/alexatop + +go 1.22.2