Files
pixa/internal/imgcache/urlparser.go
sneak 95408e68d4 Implement max input dimensions and path traversal validation
- Reject input images exceeding MaxInputDimension (8192px) to prevent DoS
- Detect path traversal: ../, encoded variants, backslashes, null bytes
2026-01-08 08:50:18 -08:00

270 lines
6.7 KiB
Go

package imgcache
import (
"errors"
"fmt"
"net/url"
"regexp"
"strconv"
"strings"
)
// URL parsing errors.
var (
ErrInvalidPath = errors.New("invalid image path")
ErrMissingHost = errors.New("missing source host")
ErrMissingSize = errors.New("missing size specification")
ErrInvalidSize = errors.New("invalid size format")
ErrInvalidFormat = errors.New("invalid or unsupported format")
ErrDimensionTooLarge = errors.New("dimension exceeds maximum")
ErrPathTraversal = errors.New("path traversal detected")
)
// MaxDimension is the maximum allowed width or height.
const MaxDimension = 8192
// sizeFormatRegex matches patterns like "800x600.webp", "0x0.jpeg", "orig.png"
var sizeFormatRegex = regexp.MustCompile(`^(\d+)x(\d+)\.(\w+)$|^(orig)\.(\w+)$`)
// ParsedURL contains the parsed components of an image proxy URL.
type ParsedURL struct {
// Host is the source origin host (e.g., "cdn.example.com")
Host string
// Path is the path on the origin (e.g., "/photos/cat.jpg")
Path string
// Query is the optional query string for the origin
Query string
// Size is the requested output dimensions
Size Size
// Format is the requested output format
Format ImageFormat
}
// ParseImagePath parses the path captured by chi's wildcard: <host>/<path>/<size>.<format>
// This is the primary entry point when using chi routing.
// Examples:
// - cdn.example.com/photos/cat.jpg/800x600.webp
// - cdn.example.com/photos/cat.jpg/0x0.jpeg
// - cdn.example.com/photos/cat.jpg/orig.png
func ParseImagePath(path string) (*ParsedURL, error) {
// Strip leading slash if present (chi may include it)
path = strings.TrimPrefix(path, "/")
if path == "" {
return nil, ErrMissingHost
}
return parseImageComponents(path)
}
// ParseImageURL parses a full URL path like /v1/image/<host>/<path>/<size>.<format>
// Use ParseImagePath instead when working with chi's wildcard capture.
func ParseImageURL(urlPath string) (*ParsedURL, error) {
// Remove the /v1/image/ prefix
const prefix = "/v1/image/"
if !strings.HasPrefix(urlPath, prefix) {
return nil, ErrInvalidPath
}
remainder := strings.TrimPrefix(urlPath, prefix)
if remainder == "" {
return nil, ErrMissingHost
}
return parseImageComponents(remainder)
}
// parseImageComponents parses <host>/<path>/<size>.<format> structure.
func parseImageComponents(remainder string) (*ParsedURL, error) {
// Check for path traversal before any other processing
if err := checkPathTraversal(remainder); err != nil {
return nil, err
}
// Find the last path segment which contains size.format
lastSlash := strings.LastIndex(remainder, "/")
if lastSlash == -1 {
return nil, ErrMissingSize
}
sizeFormat := remainder[lastSlash+1:]
hostAndPath := remainder[:lastSlash]
if hostAndPath == "" {
return nil, ErrMissingHost
}
// Parse size and format from the last segment
size, format, err := parseSizeFormat(sizeFormat)
if err != nil {
return nil, err
}
// Split host from path
// The first segment is the host, everything after is the path
firstSlash := strings.Index(hostAndPath, "/")
var host, path, query string
if firstSlash == -1 {
// No path, just host (unusual but valid)
host = hostAndPath
path = "/"
} else {
host = hostAndPath[:firstSlash]
path = hostAndPath[firstSlash:]
}
if host == "" {
return nil, ErrMissingHost
}
// Extract query string if present in path
if qIndex := strings.Index(path, "?"); qIndex != -1 {
query = path[qIndex+1:]
path = path[:qIndex]
}
// Ensure path starts with /
if !strings.HasPrefix(path, "/") {
path = "/" + path
}
return &ParsedURL{
Host: host,
Path: path,
Query: query,
Size: size,
Format: format,
}, nil
}
// checkPathTraversal detects path traversal attempts in a URL path.
// It checks for various attack vectors including:
// - Direct ../ sequences
// - URL-encoded variants (%2e%2e, %252e%252e)
// - Backslash variants (..\)
// - Null byte injection (%00)
func checkPathTraversal(path string) error {
// First, URL-decode the path to catch encoded attacks
// Decode multiple times to catch double-encoding
decoded := path
for range 3 {
newDecoded, err := url.PathUnescape(decoded)
if err != nil {
// Malformed encoding is suspicious
return ErrPathTraversal
}
if newDecoded == decoded {
break
}
decoded = newDecoded
}
// Normalize backslashes to forward slashes
normalized := strings.ReplaceAll(decoded, "\\", "/")
// Check for null bytes
if strings.Contains(normalized, "\x00") {
return ErrPathTraversal
}
// Check for parent directory traversal
// Look for "/.." or "../" patterns
if strings.Contains(normalized, "/../") ||
strings.Contains(normalized, "/..") ||
strings.HasPrefix(normalized, "../") ||
strings.HasSuffix(normalized, "/..") ||
normalized == ".." {
return ErrPathTraversal
}
// Also check for ".." as a path segment in the original path
// This catches cases where the path hasn't been normalized
segments := strings.Split(path, "/")
for _, seg := range segments {
// URL decode the segment
decodedSeg, _ := url.PathUnescape(seg)
decodedSeg = strings.ReplaceAll(decodedSeg, "\\", "/")
if decodedSeg == ".." {
return ErrPathTraversal
}
}
return nil
}
// parseSizeFormat parses strings like "800x600.webp" or "orig.png"
func parseSizeFormat(s string) (Size, ImageFormat, error) {
matches := sizeFormatRegex.FindStringSubmatch(s)
if matches == nil {
return Size{}, "", ErrInvalidSize
}
var size Size
var formatStr string
if matches[4] == "orig" {
// "orig.format" pattern
size = Size{Width: 0, Height: 0}
formatStr = matches[5]
} else {
// "WxH.format" pattern
width, err := strconv.Atoi(matches[1])
if err != nil {
return Size{}, "", ErrInvalidSize
}
height, err := strconv.Atoi(matches[2])
if err != nil {
return Size{}, "", ErrInvalidSize
}
if width > MaxDimension || height > MaxDimension {
return Size{}, "", ErrDimensionTooLarge
}
size = Size{Width: width, Height: height}
formatStr = matches[3]
}
format, err := parseFormat(formatStr)
if err != nil {
return Size{}, "", err
}
return size, format, nil
}
// parseFormat converts a format string to ImageFormat.
func parseFormat(s string) (ImageFormat, error) {
switch strings.ToLower(s) {
case "orig", "original":
return FormatOriginal, nil
case "jpg", "jpeg":
return FormatJPEG, nil
case "png":
return FormatPNG, nil
case "webp":
return FormatWebP, nil
case "avif":
return FormatAVIF, nil
case "gif":
return FormatGIF, nil
default:
return "", fmt.Errorf("%w: %s", ErrInvalidFormat, s)
}
}
// ToImageRequest converts a ParsedURL to an ImageRequest.
func (p *ParsedURL) ToImageRequest() *ImageRequest {
return &ImageRequest{
SourceHost: p.Host,
SourcePath: p.Path,
SourceQuery: p.Query,
Size: p.Size,
Format: p.Format,
}
}