Files
pixa/internal/imgcache/magic.go
sneak 30d63e80dc Add magic byte detection for image format validation
Implements format detection by checking file magic bytes for JPEG, PNG,
GIF, WebP, AVIF, and SVG. Includes validation against declared Content-Type.
2026-01-08 03:35:29 -08:00

228 lines
6.0 KiB
Go

package imgcache
import (
"bytes"
"errors"
"io"
"strings"
)
// Magic byte errors.
var (
ErrUnknownFormat = errors.New("unknown image format")
ErrMagicByteMismatch = errors.New("content does not match declared Content-Type")
ErrNotEnoughData = errors.New("not enough data to detect format")
)
// MIMEType represents a supported MIME type for input images.
type MIMEType string
// Supported input MIME types.
const (
MIMETypeJPEG = MIMEType("image/jpeg")
MIMETypePNG = MIMEType("image/png")
MIMETypeWebP = MIMEType("image/webp")
MIMETypeGIF = MIMEType("image/gif")
MIMETypeAVIF = MIMEType("image/avif")
MIMETypeSVG = MIMEType("image/svg+xml")
)
// MinMagicBytes is the minimum number of bytes needed to detect format.
const MinMagicBytes = 12
// Magic byte signatures for supported formats.
// These are effectively constants but Go doesn't support const slices.
//
//nolint:gochecknoglobals // immutable lookup data
var (
magicJPEG = []byte{0xFF, 0xD8, 0xFF}
magicPNG = []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}
magicGIF = []byte{0x47, 0x49, 0x46, 0x38} // GIF8 (GIF87a or GIF89a)
magicWebP = []byte{0x52, 0x49, 0x46, 0x46} // RIFF (WebP starts with RIFF....WEBP)
// AVIF uses the ftyp box with brand "avif" or "avis"
// Format: size(4 bytes) + "ftyp" + brand(4 bytes)
magicFtyp = []byte{0x66, 0x74, 0x79, 0x70} // "ftyp"
)
// WebP identifier appears at offset 8 after RIFF header.
//
//nolint:gochecknoglobals // immutable lookup data
var webpIdent = []byte{0x57, 0x45, 0x42, 0x50} // "WEBP"
// AVIF brand identifiers.
//
//nolint:gochecknoglobals // immutable lookup data
var (
avifBrand = []byte{0x61, 0x76, 0x69, 0x66} // "avif"
avisBrand = []byte{0x61, 0x76, 0x69, 0x73} // "avis" (AVIF sequence)
)
// DetectFormat detects the image format from magic bytes.
// Returns the MIME type and nil error on success.
func DetectFormat(data []byte) (MIMEType, error) {
if len(data) < MinMagicBytes {
return "", ErrNotEnoughData
}
// Check JPEG (FFD8FF)
if bytes.HasPrefix(data, magicJPEG) {
return MIMETypeJPEG, nil
}
// Check PNG (89504E47 0D0A1A0A)
if bytes.HasPrefix(data, magicPNG) {
return MIMETypePNG, nil
}
// Check GIF (GIF87a or GIF89a)
if bytes.HasPrefix(data, magicGIF) {
return MIMETypeGIF, nil
}
// Check WebP (RIFF....WEBP)
if bytes.HasPrefix(data, magicWebP) && len(data) >= 12 {
if bytes.Equal(data[8:12], webpIdent) {
return MIMETypeWebP, nil
}
}
// Check AVIF (....ftypavif or ....ftypavis)
// The ftyp box can start at offset 4 (after size bytes)
if len(data) >= 12 && bytes.Equal(data[4:8], magicFtyp) {
brand := data[8:12]
if bytes.Equal(brand, avifBrand) || bytes.Equal(brand, avisBrand) {
return MIMETypeAVIF, nil
}
}
// Check SVG - look for XML declaration or SVG tag
if detectSVG(data) {
return MIMETypeSVG, nil
}
return "", ErrUnknownFormat
}
// detectSVG checks if data appears to be SVG content.
func detectSVG(data []byte) bool {
// Skip BOM if present
content := skipBOM(data)
// Convert to string for easier pattern matching
s := strings.ToLower(string(content))
// Skip leading whitespace
s = strings.TrimSpace(s)
// Check for XML declaration or SVG element
return strings.HasPrefix(s, "<?xml") ||
strings.HasPrefix(s, "<svg") ||
strings.HasPrefix(s, "<!doctype svg")
}
// skipBOM removes UTF-8 BOM if present.
func skipBOM(data []byte) []byte {
if len(data) >= 3 && data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF {
return data[3:]
}
return data
}
// ValidateMagicBytes validates that the content matches the declared MIME type.
func ValidateMagicBytes(data []byte, declaredType string) error {
detected, err := DetectFormat(data)
if err != nil {
return err
}
// Normalize the declared type (remove parameters like charset)
normalizedDeclared := normalizeMIMEType(declaredType)
// Check if they match
if string(detected) != normalizedDeclared {
return ErrMagicByteMismatch
}
return nil
}
// normalizeMIMEType extracts just the media type, removing parameters.
func normalizeMIMEType(mimeType string) string {
// Handle "image/jpeg; charset=utf-8" -> "image/jpeg"
if idx := strings.Index(mimeType, ";"); idx != -1 {
mimeType = mimeType[:idx]
}
return strings.TrimSpace(strings.ToLower(mimeType))
}
// IsSupportedMIMEType checks if a MIME type is supported for input.
func IsSupportedMIMEType(mimeType string) bool {
normalized := normalizeMIMEType(mimeType)
switch MIMEType(normalized) {
case MIMETypeJPEG, MIMETypePNG, MIMETypeWebP, MIMETypeGIF, MIMETypeAVIF, MIMETypeSVG:
return true
default:
return false
}
}
// PeekAndValidate reads the minimum bytes needed for format detection,
// validates against the declared type, and returns a reader that includes
// those bytes for subsequent reading.
func PeekAndValidate(r io.Reader, declaredType string) (io.Reader, error) {
// Read minimum bytes for detection
buf := make([]byte, MinMagicBytes)
n, err := io.ReadFull(r, buf)
if err != nil && err != io.ErrUnexpectedEOF {
return nil, err
}
buf = buf[:n]
// Validate magic bytes
if err := ValidateMagicBytes(buf, declaredType); err != nil {
return nil, err
}
// Return a reader that includes the peeked bytes
return io.MultiReader(bytes.NewReader(buf), r), nil
}
// MIMEToImageFormat converts a MIME type to our ImageFormat type.
func MIMEToImageFormat(mimeType string) (ImageFormat, bool) {
normalized := normalizeMIMEType(mimeType)
switch MIMEType(normalized) {
case MIMETypeJPEG:
return FormatJPEG, true
case MIMETypePNG:
return FormatPNG, true
case MIMETypeWebP:
return FormatWebP, true
case MIMETypeGIF:
return FormatGIF, true
case MIMETypeAVIF:
return FormatAVIF, true
default:
return "", false
}
}
// ImageFormatToMIME converts our ImageFormat to a MIME type string.
func ImageFormatToMIME(format ImageFormat) string {
switch format {
case FormatJPEG:
return string(MIMETypeJPEG)
case FormatPNG:
return string(MIMETypePNG)
case FormatWebP:
return string(MIMETypeWebP)
case FormatGIF:
return string(MIMETypeGIF)
case FormatAVIF:
return string(MIMETypeAVIF)
default:
return "application/octet-stream"
}
}