Files
pixa/internal/imgcache/imgcache.go
clawbot 55a609dd77
All checks were successful
check / check (push) Successful in 4s
Bound imageprocessor.Process input read to prevent unbounded memory use (#37)
closes #31

## Problem

`ImageProcessor.Process` used `io.ReadAll(input)` without any size limit, allowing arbitrarily large inputs to exhaust all available memory. This is a DoS vector — even though the upstream fetcher has a `MaxResponseSize` limit (50 MiB), the processor interface accepts any `io.Reader` and should defend itself independently.

Additionally, the service layer's `processFromSourceOrFetch` read cached source content with `io.ReadAll` without a bound, so an unexpectedly large cached file could also cause unbounded memory consumption.

## Changes

### Processor (`processor.go`)
- Added `maxInputBytes` field to `ImageProcessor` (configurable, defaults to 50 MiB via `DefaultMaxInputBytes`)
- `NewImageProcessor` now accepts a `maxInputBytes` parameter (0 or negative uses the default)
- `Process` now wraps the input reader with `io.LimitReader` and rejects inputs exceeding the limit with `ErrInputDataTooLarge`
- Added `DefaultMaxInputBytes` and `ErrInputDataTooLarge` exported constants/errors

### Service (`service.go`)
- `NewService` now wires the fetcher's `MaxResponseSize` through to the processor
- Extracted `loadCachedSource` helper method to flatten nesting in `processFromSourceOrFetch`
- Cached source reads are now bounded by `maxResponseSize` — oversized cached files are discarded and re-fetched

### Tests (`processor_test.go`)
- `TestImageProcessor_RejectsOversizedInputData` — verifies that inputs exceeding `maxInputBytes` are rejected with `ErrInputDataTooLarge`
- `TestImageProcessor_AcceptsInputWithinLimit` — verifies that inputs within the limit are processed normally
- `TestImageProcessor_DefaultMaxInputBytes` — verifies that 0 and negative values use the default
- All existing tests updated to use `NewImageProcessor(0)` (default limit)

Co-authored-by: user <user@Mac.lan guest wan>
Co-authored-by: clawbot <clawbot@eeqj.de>
Reviewed-on: #37
Co-authored-by: clawbot <clawbot@noreply.example.org>
Co-committed-by: clawbot <clawbot@noreply.example.org>
2026-03-20 07:01:15 +01:00

213 lines
6.2 KiB
Go

// Package imgcache provides interfaces and types for the image caching proxy.
package imgcache
import (
"context"
"errors"
"io"
"net/url"
"time"
)
// ErrInvalidFitMode is returned when an invalid fit mode is provided.
var ErrInvalidFitMode = errors.New("invalid fit mode")
// ImageFormat represents supported output image formats.
type ImageFormat string
// Supported image output formats.
const (
FormatOriginal ImageFormat = "orig"
FormatJPEG ImageFormat = "jpeg"
FormatPNG ImageFormat = "png"
FormatWebP ImageFormat = "webp"
FormatAVIF ImageFormat = "avif"
FormatGIF ImageFormat = "gif"
)
// Size represents requested image dimensions
type Size struct {
Width int
Height int
}
// OriginalSize returns true if this represents "keep original size"
func (s Size) OriginalSize() bool {
return s.Width == 0 && s.Height == 0
}
// FitMode represents how to fit image into requested dimensions.
type FitMode string
// Supported image fit modes.
const (
FitCover FitMode = "cover"
FitContain FitMode = "contain"
FitFill FitMode = "fill"
FitInside FitMode = "inside"
FitOutside FitMode = "outside"
)
// ValidateFitMode checks if the given fit mode is valid.
// Returns ErrInvalidFitMode for unrecognized fit modes.
func ValidateFitMode(fit FitMode) error {
switch fit {
case FitCover, FitContain, FitFill, FitInside, FitOutside, "":
return nil
default:
return ErrInvalidFitMode
}
}
// ImageRequest represents a request for a processed image
type ImageRequest struct {
// SourceHost is the origin host (e.g., "cdn.example.com")
SourceHost string
// SourcePath is the path on the origin (e.g., "/photos/cat.jpg")
SourcePath string
// SourceQuery is the optional query string for the origin URL
SourceQuery string
// Size is the requested output dimensions
Size Size
// Format is the requested output format
Format ImageFormat
// Quality is the output quality (1-100) for lossy formats
Quality int
// FitMode is how to fit the image into requested dimensions
FitMode FitMode
// Signature is the HMAC signature for non-whitelisted hosts
Signature string
// Expires is the signature expiration timestamp
Expires time.Time
// AllowHTTP indicates whether HTTP (non-TLS) is allowed for this request
AllowHTTP bool
}
// SourceURL returns the full upstream URL to fetch.
// Uses http:// scheme when AllowHTTP is true, otherwise https://.
func (r *ImageRequest) SourceURL() string {
scheme := "https"
if r.AllowHTTP {
scheme = "http"
}
url := scheme + "://" + r.SourceHost + r.SourcePath
if r.SourceQuery != "" {
url += "?" + r.SourceQuery
}
return url
}
// ImageResponse represents a processed image ready to serve
type ImageResponse struct {
// Content is the image data reader
Content io.ReadCloser
// ContentLength is the size in bytes (-1 if unknown)
ContentLength int64
// ContentType is the MIME type of the response
ContentType string
// ETag is the entity tag for caching
ETag string
// LastModified is when the content was last modified
LastModified time.Time
// CacheStatus indicates HIT, MISS, or STALE
CacheStatus CacheStatus
// FetchedBytes is the number of bytes fetched from upstream (0 if cache hit)
FetchedBytes int64
}
// CacheStatus indicates whether the response was served from cache.
type CacheStatus string
// Cache status values for response headers.
const (
CacheHit CacheStatus = "HIT"
CacheMiss CacheStatus = "MISS"
CacheStale CacheStatus = "STALE"
)
// ImageCache is the main interface for the image caching proxy
type ImageCache interface {
// Get retrieves a processed image, fetching and processing if necessary
Get(ctx context.Context, req *ImageRequest) (*ImageResponse, error)
// Warm pre-fetches and caches an image without returning it
Warm(ctx context.Context, req *ImageRequest) error
// Purge removes a cached image
Purge(ctx context.Context, req *ImageRequest) error
// Stats returns cache statistics
Stats(ctx context.Context) (*CacheStats, error)
}
// CacheStats contains cache statistics
type CacheStats struct {
// TotalItems is the number of cached items
TotalItems int64
// TotalSizeBytes is the total size of cached content
TotalSizeBytes int64
// HitCount is the number of cache hits
HitCount int64
// MissCount is the number of cache misses
MissCount int64
// HitRate is HitCount / (HitCount + MissCount)
HitRate float64
}
// SignatureValidator validates request signatures
type SignatureValidator interface {
// Validate checks if the signature is valid for the request
Validate(req *ImageRequest) error
// Generate creates a signature for a request
Generate(req *ImageRequest) string
}
// Whitelist checks if a URL is whitelisted (no signature required)
type Whitelist interface {
// IsWhitelisted returns true if the URL doesn't require a signature
IsWhitelisted(u *url.URL) bool
}
// Fetcher fetches images from upstream origins
type Fetcher interface {
// Fetch retrieves an image from the origin
Fetch(ctx context.Context, url string) (*FetchResult, error)
}
// FetchResult contains the result of fetching from upstream
type FetchResult struct {
// Content is the raw image data
Content io.ReadCloser
// ContentLength is the size in bytes (-1 if unknown)
ContentLength int64
// ContentType is the MIME type from upstream
ContentType string
// Headers contains all response headers from upstream
Headers map[string][]string
// StatusCode is the HTTP status code from upstream
StatusCode int
// FetchDurationMs is how long the fetch took in milliseconds
FetchDurationMs int64
// RemoteAddr is the IP:port of the upstream server
RemoteAddr string
// HTTPVersion is the protocol version (e.g., "1.1", "2.0")
HTTPVersion string
// TLSVersion is the TLS protocol version (e.g., "TLS 1.3")
TLSVersion string
// TLSCipherSuite is the negotiated cipher suite name
TLSCipherSuite string
}
// Storage handles persistent storage of cached content
type Storage interface {
// Store saves content and returns its hash
Store(ctx context.Context, content io.Reader) (hash string, err error)
// Load retrieves content by hash
Load(ctx context.Context, hash string) (io.ReadCloser, error)
// Delete removes content by hash
Delete(ctx context.Context, hash string) error
// Exists checks if content exists
Exists(ctx context.Context, hash string) (bool, error)
}