2 Commits

Author SHA1 Message Date
user
dedc729f37 refactor: extract magic byte detection into internal/magic package
All checks were successful
check / check (push) Successful in 52s
Move MIMEType, DetectFormat, ValidateMagicBytes, PeekAndValidate,
IsSupportedMIMEType, MIMEToImageFormat, and ImageFormatToMIME from
internal/imgcache into a new internal/magic package.

The magic package defines its own ImageFormat type and constants to
avoid circular imports (imgcache imports magic for validation,
magic cannot import imgcache).

Update import sites in imgcache/service.go and service_test.go.

Part of issue #39.
2026-03-25 17:46:40 -07:00
e34743f070 refactor: extract whitelist package from internal/imgcache (#41)
All checks were successful
check / check (push) Successful in 4s
Extract `HostWhitelist`, `NewHostWhitelist`, `IsWhitelisted`, `IsEmpty`, and `Count` from `internal/imgcache/` into the new `internal/whitelist/` package.

The whitelist package is completely self-contained, depending only on `net/url` and `strings` from the standard library. No circular imports introduced.

**Changes:**
- Moved `whitelist.go` → `internal/whitelist/whitelist.go` (added package comment)
- Moved `whitelist_test.go` → `internal/whitelist/whitelist_test.go` (adapted to external test style)
- Updated `internal/imgcache/service.go` to import from `sneak.berlin/go/pixa/internal/whitelist`

`docker build .` passes (lint, tests, build).

Part of [issue #39](#39)

Co-authored-by: clawbot <clawbot@noreply.git.eeqj.de>
Co-authored-by: user <user@Mac.lan guest wan>
Reviewed-on: #41
Co-authored-by: clawbot <clawbot@noreply.example.org>
Co-committed-by: clawbot <clawbot@noreply.example.org>
2026-03-25 20:44:56 +01:00
6 changed files with 58 additions and 35 deletions

View File

@@ -1,25 +1,26 @@
package imgcache // Package allowlist provides host-based URL allow-listing for the image proxy.
package allowlist
import ( import (
"net/url" "net/url"
"strings" "strings"
) )
// HostWhitelist implements the Whitelist interface for checking allowed source hosts. // HostAllowList checks whether source hosts are permitted.
type HostWhitelist struct { type HostAllowList struct {
// exactHosts contains hosts that must match exactly (e.g., "cdn.example.com") // exactHosts contains hosts that must match exactly (e.g., "cdn.example.com")
exactHosts map[string]struct{} exactHosts map[string]struct{}
// suffixHosts contains domain suffixes to match (e.g., ".example.com" matches "cdn.example.com") // suffixHosts contains domain suffixes to match (e.g., ".example.com" matches "cdn.example.com")
suffixHosts []string suffixHosts []string
} }
// NewHostWhitelist creates a whitelist from a list of host patterns. // New creates a HostAllowList from a list of host patterns.
// Patterns starting with "." are treated as suffix matches. // Patterns starting with "." are treated as suffix matches.
// Examples: // Examples:
// - "cdn.example.com" - exact match only // - "cdn.example.com" - exact match only
// - ".example.com" - matches cdn.example.com, images.example.com, etc. // - ".example.com" - matches cdn.example.com, images.example.com, etc.
func NewHostWhitelist(patterns []string) *HostWhitelist { func New(patterns []string) *HostAllowList {
w := &HostWhitelist{ w := &HostAllowList{
exactHosts: make(map[string]struct{}), exactHosts: make(map[string]struct{}),
suffixHosts: make([]string, 0), suffixHosts: make([]string, 0),
} }
@@ -40,8 +41,8 @@ func NewHostWhitelist(patterns []string) *HostWhitelist {
return w return w
} }
// IsWhitelisted checks if a URL's host is in the whitelist. // IsAllowed checks if a URL's host is in the allow list.
func (w *HostWhitelist) IsWhitelisted(u *url.URL) bool { func (w *HostAllowList) IsAllowed(u *url.URL) bool {
if u == nil { if u == nil {
return false return false
} }
@@ -71,12 +72,12 @@ func (w *HostWhitelist) IsWhitelisted(u *url.URL) bool {
return false return false
} }
// IsEmpty returns true if the whitelist has no entries. // IsEmpty returns true if the allow list has no entries.
func (w *HostWhitelist) IsEmpty() bool { func (w *HostAllowList) IsEmpty() bool {
return len(w.exactHosts) == 0 && len(w.suffixHosts) == 0 return len(w.exactHosts) == 0 && len(w.suffixHosts) == 0
} }
// Count returns the total number of whitelist entries. // Count returns the total number of allow list entries.
func (w *HostWhitelist) Count() int { func (w *HostAllowList) Count() int {
return len(w.exactHosts) + len(w.suffixHosts) return len(w.exactHosts) + len(w.suffixHosts)
} }

View File

@@ -1,11 +1,13 @@
package imgcache package allowlist_test
import ( import (
"net/url" "net/url"
"testing" "testing"
"sneak.berlin/go/pixa/internal/allowlist"
) )
func TestHostWhitelist_IsWhitelisted(t *testing.T) { func TestHostAllowList_IsAllowed(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
patterns []string patterns []string
@@ -67,7 +69,7 @@ func TestHostWhitelist_IsWhitelisted(t *testing.T) {
want: true, want: true,
}, },
{ {
name: "empty whitelist", name: "empty allow list",
patterns: []string{}, patterns: []string{},
testURL: "https://cdn.example.com/image.jpg", testURL: "https://cdn.example.com/image.jpg",
want: false, want: false,
@@ -94,7 +96,7 @@ func TestHostWhitelist_IsWhitelisted(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
w := NewHostWhitelist(tt.patterns) w := allowlist.New(tt.patterns)
var u *url.URL var u *url.URL
if tt.testURL != "" { if tt.testURL != "" {
@@ -105,15 +107,15 @@ func TestHostWhitelist_IsWhitelisted(t *testing.T) {
} }
} }
got := w.IsWhitelisted(u) got := w.IsAllowed(u)
if got != tt.want { if got != tt.want {
t.Errorf("IsWhitelisted() = %v, want %v", got, tt.want) t.Errorf("IsAllowed() = %v, want %v", got, tt.want)
} }
}) })
} }
} }
func TestHostWhitelist_IsEmpty(t *testing.T) { func TestHostAllowList_IsEmpty(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
patterns []string patterns []string
@@ -143,7 +145,7 @@ func TestHostWhitelist_IsEmpty(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
w := NewHostWhitelist(tt.patterns) w := allowlist.New(tt.patterns)
if got := w.IsEmpty(); got != tt.want { if got := w.IsEmpty(); got != tt.want {
t.Errorf("IsEmpty() = %v, want %v", got, tt.want) t.Errorf("IsEmpty() = %v, want %v", got, tt.want)
} }
@@ -151,7 +153,7 @@ func TestHostWhitelist_IsEmpty(t *testing.T) {
} }
} }
func TestHostWhitelist_Count(t *testing.T) { func TestHostAllowList_Count(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
patterns []string patterns []string
@@ -181,7 +183,7 @@ func TestHostWhitelist_Count(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
w := NewHostWhitelist(tt.patterns) w := allowlist.New(tt.patterns)
if got := w.Count(); got != tt.want { if got := w.Count(); got != tt.want {
t.Errorf("Count() = %v, want %v", got, tt.want) t.Errorf("Count() = %v, want %v", got, tt.want)
} }

View File

@@ -11,7 +11,9 @@ import (
"time" "time"
"github.com/dustin/go-humanize" "github.com/dustin/go-humanize"
"sneak.berlin/go/pixa/internal/allowlist"
"sneak.berlin/go/pixa/internal/imageprocessor" "sneak.berlin/go/pixa/internal/imageprocessor"
"sneak.berlin/go/pixa/internal/magic"
) )
// Service implements the ImageCache interface, orchestrating cache, fetcher, and processor. // Service implements the ImageCache interface, orchestrating cache, fetcher, and processor.
@@ -20,7 +22,7 @@ type Service struct {
fetcher Fetcher fetcher Fetcher
processor *imageprocessor.ImageProcessor processor *imageprocessor.ImageProcessor
signer *Signer signer *Signer
whitelist *HostWhitelist allowlist *allowlist.HostAllowList
log *slog.Logger log *slog.Logger
allowHTTP bool allowHTTP bool
maxResponseSize int64 maxResponseSize int64
@@ -85,7 +87,7 @@ func NewService(cfg *ServiceConfig) (*Service, error) {
fetcher: fetcher, fetcher: fetcher,
processor: imageprocessor.New(imageprocessor.Params{MaxInputBytes: maxResponseSize}), processor: imageprocessor.New(imageprocessor.Params{MaxInputBytes: maxResponseSize}),
signer: signer, signer: signer,
whitelist: NewHostWhitelist(cfg.Whitelist), allowlist: allowlist.New(cfg.Whitelist),
log: log, log: log,
allowHTTP: allowHTTP, allowHTTP: allowHTTP,
maxResponseSize: maxResponseSize, maxResponseSize: maxResponseSize,
@@ -276,7 +278,7 @@ func (s *Service) fetchAndProcess(
) )
// Validate magic bytes match content type // Validate magic bytes match content type
if err := ValidateMagicBytes(sourceData, fetchResult.ContentType); err != nil { if err := magic.ValidateMagicBytes(sourceData, fetchResult.ContentType); err != nil {
return nil, fmt.Errorf("content validation failed: %w", err) return nil, fmt.Errorf("content validation failed: %w", err)
} }
@@ -381,7 +383,7 @@ func (s *Service) Stats(ctx context.Context) (*CacheStats, error) {
// ValidateRequest validates the request signature if required. // ValidateRequest validates the request signature if required.
func (s *Service) ValidateRequest(req *ImageRequest) error { func (s *Service) ValidateRequest(req *ImageRequest) error {
// Check if host is whitelisted (no signature required) // Check if host is allowed (no signature required)
sourceURL := req.SourceURL() sourceURL := req.SourceURL()
parsedURL, err := url.Parse(sourceURL) parsedURL, err := url.Parse(sourceURL)
@@ -389,11 +391,11 @@ func (s *Service) ValidateRequest(req *ImageRequest) error {
return fmt.Errorf("invalid source URL: %w", err) return fmt.Errorf("invalid source URL: %w", err)
} }
if s.whitelist.IsWhitelisted(parsedURL) { if s.allowlist.IsAllowed(parsedURL) {
return nil return nil
} }
// Signature required for non-whitelisted hosts // Signature required for non-allowed hosts
return s.signer.Verify(req) return s.signer.Verify(req)
} }

View File

@@ -5,6 +5,8 @@ import (
"io" "io"
"testing" "testing"
"time" "time"
"sneak.berlin/go/pixa/internal/magic"
) )
func TestService_Get_WhitelistedHost(t *testing.T) { func TestService_Get_WhitelistedHost(t *testing.T) {
@@ -315,17 +317,17 @@ func TestService_Get_FormatConversion(t *testing.T) {
t.Fatalf("failed to read response: %v", err) t.Fatalf("failed to read response: %v", err)
} }
detectedMIME, err := DetectFormat(data) detectedMIME, err := magic.DetectFormat(data)
if err != nil { if err != nil {
t.Fatalf("failed to detect format: %v", err) t.Fatalf("failed to detect format: %v", err)
} }
expectedFormat, ok := MIMEToImageFormat(tt.wantMIME) expectedFormat, ok := magic.MIMEToImageFormat(tt.wantMIME)
if !ok { if !ok {
t.Fatalf("unknown format for MIME type: %s", tt.wantMIME) t.Fatalf("unknown format for MIME type: %s", tt.wantMIME)
} }
detectedFormat, ok := MIMEToImageFormat(string(detectedMIME)) detectedFormat, ok := magic.MIMEToImageFormat(string(detectedMIME))
if !ok { if !ok {
t.Fatalf("unknown format for detected MIME type: %s", detectedMIME) t.Fatalf("unknown format for detected MIME type: %s", detectedMIME)
} }

View File

@@ -1,4 +1,6 @@
package imgcache // Package magic detects image formats from magic bytes and validates
// content against declared MIME types.
package magic
import ( import (
"bytes" "bytes"
@@ -27,6 +29,20 @@ const (
MIMETypeSVG = MIMEType("image/svg+xml") MIMETypeSVG = MIMEType("image/svg+xml")
) )
// ImageFormat represents supported output image formats.
// This mirrors the type in imgcache to avoid circular imports.
type ImageFormat string
// Supported image output formats.
const (
FormatOriginal ImageFormat = "orig"
FormatJPEG ImageFormat = "jpeg"
FormatPNG ImageFormat = "png"
FormatWebP ImageFormat = "webp"
FormatAVIF ImageFormat = "avif"
FormatGIF ImageFormat = "gif"
)
// MinMagicBytes is the minimum number of bytes needed to detect format. // MinMagicBytes is the minimum number of bytes needed to detect format.
const MinMagicBytes = 12 const MinMagicBytes = 12
@@ -189,7 +205,7 @@ func PeekAndValidate(r io.Reader, declaredType string) (io.Reader, error) {
return io.MultiReader(bytes.NewReader(buf), r), nil return io.MultiReader(bytes.NewReader(buf), r), nil
} }
// MIMEToImageFormat converts a MIME type to our ImageFormat type. // MIMEToImageFormat converts a MIME type to an ImageFormat.
func MIMEToImageFormat(mimeType string) (ImageFormat, bool) { func MIMEToImageFormat(mimeType string) (ImageFormat, bool) {
normalized := normalizeMIMEType(mimeType) normalized := normalizeMIMEType(mimeType)
switch MIMEType(normalized) { switch MIMEType(normalized) {
@@ -208,7 +224,7 @@ func MIMEToImageFormat(mimeType string) (ImageFormat, bool) {
} }
} }
// ImageFormatToMIME converts our ImageFormat to a MIME type string. // ImageFormatToMIME converts an ImageFormat to a MIME type string.
func ImageFormatToMIME(format ImageFormat) string { func ImageFormatToMIME(format ImageFormat) string {
switch format { switch format {
case FormatJPEG: case FormatJPEG:

View File

@@ -1,4 +1,4 @@
package imgcache package magic
import ( import (
"bytes" "bytes"