From c69ddf6f6127f2b0d40d9c11a18e6eabc55c76be Mon Sep 17 00:00:00 2001 From: sneak Date: Thu, 8 Jan 2026 02:55:05 -0800 Subject: [PATCH] Implement URL parser for image proxy routes --- internal/imgcache/urlparser.go | 187 ++++++++++++++++++++++++ internal/imgcache/urlparser_test.go | 218 ++++++++++++++++++++++++++++ 2 files changed, 405 insertions(+) create mode 100644 internal/imgcache/urlparser.go create mode 100644 internal/imgcache/urlparser_test.go diff --git a/internal/imgcache/urlparser.go b/internal/imgcache/urlparser.go new file mode 100644 index 0000000..7bf70b9 --- /dev/null +++ b/internal/imgcache/urlparser.go @@ -0,0 +1,187 @@ +package imgcache + +import ( + "errors" + "fmt" + "regexp" + "strconv" + "strings" +) + +// URL parsing errors. +var ( + ErrInvalidPath = errors.New("invalid image path") + ErrMissingHost = errors.New("missing source host") + ErrMissingSize = errors.New("missing size specification") + ErrInvalidSize = errors.New("invalid size format") + ErrInvalidFormat = errors.New("invalid or unsupported format") + ErrDimensionTooLarge = errors.New("dimension exceeds maximum") +) + +// MaxDimension is the maximum allowed width or height. +const MaxDimension = 8192 + +// sizeFormatRegex matches patterns like "800x600.webp", "0x0.jpeg", "orig.png" +var sizeFormatRegex = regexp.MustCompile(`^(\d+)x(\d+)\.(\w+)$|^(orig)\.(\w+)$`) + +// ParsedURL contains the parsed components of an image proxy URL. +type ParsedURL struct { + // Host is the source origin host (e.g., "cdn.example.com") + Host string + // Path is the path on the origin (e.g., "/photos/cat.jpg") + Path string + // Query is the optional query string for the origin + Query string + // Size is the requested output dimensions + Size Size + // Format is the requested output format + Format ImageFormat +} + +// ParseImageURL parses a URL path like /v1/image///. +// Examples: +// - /v1/image/cdn.example.com/photos/cat.jpg/800x600.webp +// - /v1/image/cdn.example.com/photos/cat.jpg/0x0.jpeg +// - /v1/image/cdn.example.com/photos/cat.jpg/orig.png +// - /v1/image/cdn.example.com/photos/cat.jpg?q=1/800x600.webp +func ParseImageURL(urlPath string) (*ParsedURL, error) { + // Remove the /v1/image/ prefix + const prefix = "/v1/image/" + if !strings.HasPrefix(urlPath, prefix) { + return nil, ErrInvalidPath + } + + remainder := strings.TrimPrefix(urlPath, prefix) + if remainder == "" { + return nil, ErrMissingHost + } + + // Find the last path segment which contains size.format + lastSlash := strings.LastIndex(remainder, "/") + if lastSlash == -1 { + return nil, ErrMissingSize + } + + sizeFormat := remainder[lastSlash+1:] + hostAndPath := remainder[:lastSlash] + + if hostAndPath == "" { + return nil, ErrMissingHost + } + + // Parse size and format from the last segment + size, format, err := parseSizeFormat(sizeFormat) + if err != nil { + return nil, err + } + + // Split host from path + // The first segment is the host, everything after is the path + firstSlash := strings.Index(hostAndPath, "/") + var host, path, query string + + if firstSlash == -1 { + // No path, just host (unusual but valid) + host = hostAndPath + path = "/" + } else { + host = hostAndPath[:firstSlash] + path = hostAndPath[firstSlash:] + } + + if host == "" { + return nil, ErrMissingHost + } + + // Extract query string if present in path + if qIndex := strings.Index(path, "?"); qIndex != -1 { + query = path[qIndex+1:] + path = path[:qIndex] + } + + // Ensure path starts with / + if !strings.HasPrefix(path, "/") { + path = "/" + path + } + + return &ParsedURL{ + Host: host, + Path: path, + Query: query, + Size: size, + Format: format, + }, nil +} + +// parseSizeFormat parses strings like "800x600.webp" or "orig.png" +func parseSizeFormat(s string) (Size, ImageFormat, error) { + matches := sizeFormatRegex.FindStringSubmatch(s) + if matches == nil { + return Size{}, "", ErrInvalidSize + } + + var size Size + var formatStr string + + if matches[4] == "orig" { + // "orig.format" pattern + size = Size{Width: 0, Height: 0} + formatStr = matches[5] + } else { + // "WxH.format" pattern + width, err := strconv.Atoi(matches[1]) + if err != nil { + return Size{}, "", ErrInvalidSize + } + + height, err := strconv.Atoi(matches[2]) + if err != nil { + return Size{}, "", ErrInvalidSize + } + + if width > MaxDimension || height > MaxDimension { + return Size{}, "", ErrDimensionTooLarge + } + + size = Size{Width: width, Height: height} + formatStr = matches[3] + } + + format, err := parseFormat(formatStr) + if err != nil { + return Size{}, "", err + } + + return size, format, nil +} + +// parseFormat converts a format string to ImageFormat. +func parseFormat(s string) (ImageFormat, error) { + switch strings.ToLower(s) { + case "orig", "original": + return FormatOriginal, nil + case "jpg", "jpeg": + return FormatJPEG, nil + case "png": + return FormatPNG, nil + case "webp": + return FormatWebP, nil + case "avif": + return FormatAVIF, nil + case "gif": + return FormatGIF, nil + default: + return "", fmt.Errorf("%w: %s", ErrInvalidFormat, s) + } +} + +// ToImageRequest converts a ParsedURL to an ImageRequest. +func (p *ParsedURL) ToImageRequest() *ImageRequest { + return &ImageRequest{ + SourceHost: p.Host, + SourcePath: p.Path, + SourceQuery: p.Query, + Size: p.Size, + Format: p.Format, + } +} diff --git a/internal/imgcache/urlparser_test.go b/internal/imgcache/urlparser_test.go new file mode 100644 index 0000000..136d277 --- /dev/null +++ b/internal/imgcache/urlparser_test.go @@ -0,0 +1,218 @@ +package imgcache + +import ( + "testing" +) + +func TestParseImageURL(t *testing.T) { + tests := []struct { + name string + input string + want *ParsedURL + wantErr error + }{ + { + name: "basic path with size", + input: "/v1/image/cdn.example.com/photos/cat.jpg/800x600.webp", + want: &ParsedURL{ + Host: "cdn.example.com", + Path: "/photos/cat.jpg", + Query: "", + Size: Size{Width: 800, Height: 600}, + Format: FormatWebP, + }, + }, + { + name: "original size with 0x0", + input: "/v1/image/cdn.example.com/photos/cat.jpg/0x0.jpeg", + want: &ParsedURL{ + Host: "cdn.example.com", + Path: "/photos/cat.jpg", + Query: "", + Size: Size{Width: 0, Height: 0}, + Format: FormatJPEG, + }, + }, + { + name: "original size with orig keyword", + input: "/v1/image/cdn.example.com/photos/cat.jpg/orig.png", + want: &ParsedURL{ + Host: "cdn.example.com", + Path: "/photos/cat.jpg", + Query: "", + Size: Size{Width: 0, Height: 0}, + Format: FormatPNG, + }, + }, + { + name: "path with query string", + input: "/v1/image/cdn.example.com/photos/cat.jpg?arg1=val1&arg2=val2/800x600.webp", + want: &ParsedURL{ + Host: "cdn.example.com", + Path: "/photos/cat.jpg", + Query: "arg1=val1&arg2=val2", + Size: Size{Width: 800, Height: 600}, + Format: FormatWebP, + }, + }, + { + name: "deep nested path", + input: "/v1/image/cdn.example.com/a/b/c/d/image.jpg/1920x1080.avif", + want: &ParsedURL{ + Host: "cdn.example.com", + Path: "/a/b/c/d/image.jpg", + Query: "", + Size: Size{Width: 1920, Height: 1080}, + Format: FormatAVIF, + }, + }, + { + name: "jpg alias for jpeg", + input: "/v1/image/example.com/img.png/100x100.jpg", + want: &ParsedURL{ + Host: "example.com", + Path: "/img.png", + Query: "", + Size: Size{Width: 100, Height: 100}, + Format: FormatJPEG, + }, + }, + { + name: "gif format", + input: "/v1/image/example.com/animated.gif/200x200.gif", + want: &ParsedURL{ + Host: "example.com", + Path: "/animated.gif", + Query: "", + Size: Size{Width: 200, Height: 200}, + Format: FormatGIF, + }, + }, + { + name: "missing prefix", + input: "/image/cdn.example.com/photo.jpg/800x600.webp", + wantErr: ErrInvalidPath, + }, + { + name: "empty path after prefix", + input: "/v1/image/", + wantErr: ErrMissingHost, + }, + { + name: "host only, no path or size", + input: "/v1/image/cdn.example.com", + wantErr: ErrMissingSize, + }, + { + name: "invalid size format", + input: "/v1/image/cdn.example.com/photo.jpg/invalid.webp", + wantErr: ErrInvalidSize, + }, + { + name: "unsupported format", + input: "/v1/image/cdn.example.com/photo.jpg/800x600.bmp", + wantErr: ErrInvalidFormat, + }, + { + name: "dimension too large", + input: "/v1/image/cdn.example.com/photo.jpg/10000x600.webp", + wantErr: ErrDimensionTooLarge, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ParseImageURL(tt.input) + + if tt.wantErr != nil { + if err == nil { + t.Errorf("ParseImageURL() error = nil, wantErr %v", tt.wantErr) + + return + } + if !errorIs(err, tt.wantErr) { + t.Errorf("ParseImageURL() error = %v, wantErr %v", err, tt.wantErr) + } + + return + } + + if err != nil { + t.Errorf("ParseImageURL() unexpected error = %v", err) + + return + } + + if got.Host != tt.want.Host { + t.Errorf("Host = %q, want %q", got.Host, tt.want.Host) + } + if got.Path != tt.want.Path { + t.Errorf("Path = %q, want %q", got.Path, tt.want.Path) + } + if got.Query != tt.want.Query { + t.Errorf("Query = %q, want %q", got.Query, tt.want.Query) + } + if got.Size != tt.want.Size { + t.Errorf("Size = %v, want %v", got.Size, tt.want.Size) + } + if got.Format != tt.want.Format { + t.Errorf("Format = %q, want %q", got.Format, tt.want.Format) + } + }) + } +} + +func TestParsedURL_ToImageRequest(t *testing.T) { + parsed := &ParsedURL{ + Host: "cdn.example.com", + Path: "/photos/cat.jpg", + Query: "version=2", + Size: Size{Width: 800, Height: 600}, + Format: FormatWebP, + } + + req := parsed.ToImageRequest() + + if req.SourceHost != parsed.Host { + t.Errorf("SourceHost = %q, want %q", req.SourceHost, parsed.Host) + } + if req.SourcePath != parsed.Path { + t.Errorf("SourcePath = %q, want %q", req.SourcePath, parsed.Path) + } + if req.SourceQuery != parsed.Query { + t.Errorf("SourceQuery = %q, want %q", req.SourceQuery, parsed.Query) + } + if req.Size != parsed.Size { + t.Errorf("Size = %v, want %v", req.Size, parsed.Size) + } + if req.Format != parsed.Format { + t.Errorf("Format = %q, want %q", req.Format, parsed.Format) + } +} + +// errorIs checks if err matches target (handles wrapped errors). +func errorIs(err, target error) bool { + if err == target { + return true + } + // Check if error message contains target message for wrapped errors + if err != nil && target != nil { + return contains(err.Error(), target.Error()) + } + + return false +} + +func contains(s, substr string) bool { + return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsAt(s, substr)) +} + +func containsAt(s, substr string) bool { + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + + return false +}