Add magic byte detection for image format validation

Implements format detection by checking file magic bytes for JPEG, PNG,
GIF, WebP, AVIF, and SVG. Includes validation against declared Content-Type.
This commit is contained in:
2026-01-08 03:35:29 -08:00
parent 9ff44b7e65
commit 30d63e80dc
2 changed files with 724 additions and 0 deletions

227
internal/imgcache/magic.go Normal file
View File

@@ -0,0 +1,227 @@
package imgcache
import (
"bytes"
"errors"
"io"
"strings"
)
// Magic byte errors.
var (
ErrUnknownFormat = errors.New("unknown image format")
ErrMagicByteMismatch = errors.New("content does not match declared Content-Type")
ErrNotEnoughData = errors.New("not enough data to detect format")
)
// MIMEType represents a supported MIME type for input images.
type MIMEType string
// Supported input MIME types.
const (
MIMETypeJPEG = MIMEType("image/jpeg")
MIMETypePNG = MIMEType("image/png")
MIMETypeWebP = MIMEType("image/webp")
MIMETypeGIF = MIMEType("image/gif")
MIMETypeAVIF = MIMEType("image/avif")
MIMETypeSVG = MIMEType("image/svg+xml")
)
// MinMagicBytes is the minimum number of bytes needed to detect format.
const MinMagicBytes = 12
// Magic byte signatures for supported formats.
// These are effectively constants but Go doesn't support const slices.
//
//nolint:gochecknoglobals // immutable lookup data
var (
magicJPEG = []byte{0xFF, 0xD8, 0xFF}
magicPNG = []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}
magicGIF = []byte{0x47, 0x49, 0x46, 0x38} // GIF8 (GIF87a or GIF89a)
magicWebP = []byte{0x52, 0x49, 0x46, 0x46} // RIFF (WebP starts with RIFF....WEBP)
// AVIF uses the ftyp box with brand "avif" or "avis"
// Format: size(4 bytes) + "ftyp" + brand(4 bytes)
magicFtyp = []byte{0x66, 0x74, 0x79, 0x70} // "ftyp"
)
// WebP identifier appears at offset 8 after RIFF header.
//
//nolint:gochecknoglobals // immutable lookup data
var webpIdent = []byte{0x57, 0x45, 0x42, 0x50} // "WEBP"
// AVIF brand identifiers.
//
//nolint:gochecknoglobals // immutable lookup data
var (
avifBrand = []byte{0x61, 0x76, 0x69, 0x66} // "avif"
avisBrand = []byte{0x61, 0x76, 0x69, 0x73} // "avis" (AVIF sequence)
)
// DetectFormat detects the image format from magic bytes.
// Returns the MIME type and nil error on success.
func DetectFormat(data []byte) (MIMEType, error) {
if len(data) < MinMagicBytes {
return "", ErrNotEnoughData
}
// Check JPEG (FFD8FF)
if bytes.HasPrefix(data, magicJPEG) {
return MIMETypeJPEG, nil
}
// Check PNG (89504E47 0D0A1A0A)
if bytes.HasPrefix(data, magicPNG) {
return MIMETypePNG, nil
}
// Check GIF (GIF87a or GIF89a)
if bytes.HasPrefix(data, magicGIF) {
return MIMETypeGIF, nil
}
// Check WebP (RIFF....WEBP)
if bytes.HasPrefix(data, magicWebP) && len(data) >= 12 {
if bytes.Equal(data[8:12], webpIdent) {
return MIMETypeWebP, nil
}
}
// Check AVIF (....ftypavif or ....ftypavis)
// The ftyp box can start at offset 4 (after size bytes)
if len(data) >= 12 && bytes.Equal(data[4:8], magicFtyp) {
brand := data[8:12]
if bytes.Equal(brand, avifBrand) || bytes.Equal(brand, avisBrand) {
return MIMETypeAVIF, nil
}
}
// Check SVG - look for XML declaration or SVG tag
if detectSVG(data) {
return MIMETypeSVG, nil
}
return "", ErrUnknownFormat
}
// detectSVG checks if data appears to be SVG content.
func detectSVG(data []byte) bool {
// Skip BOM if present
content := skipBOM(data)
// Convert to string for easier pattern matching
s := strings.ToLower(string(content))
// Skip leading whitespace
s = strings.TrimSpace(s)
// Check for XML declaration or SVG element
return strings.HasPrefix(s, "<?xml") ||
strings.HasPrefix(s, "<svg") ||
strings.HasPrefix(s, "<!doctype svg")
}
// skipBOM removes UTF-8 BOM if present.
func skipBOM(data []byte) []byte {
if len(data) >= 3 && data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF {
return data[3:]
}
return data
}
// ValidateMagicBytes validates that the content matches the declared MIME type.
func ValidateMagicBytes(data []byte, declaredType string) error {
detected, err := DetectFormat(data)
if err != nil {
return err
}
// Normalize the declared type (remove parameters like charset)
normalizedDeclared := normalizeMIMEType(declaredType)
// Check if they match
if string(detected) != normalizedDeclared {
return ErrMagicByteMismatch
}
return nil
}
// normalizeMIMEType extracts just the media type, removing parameters.
func normalizeMIMEType(mimeType string) string {
// Handle "image/jpeg; charset=utf-8" -> "image/jpeg"
if idx := strings.Index(mimeType, ";"); idx != -1 {
mimeType = mimeType[:idx]
}
return strings.TrimSpace(strings.ToLower(mimeType))
}
// IsSupportedMIMEType checks if a MIME type is supported for input.
func IsSupportedMIMEType(mimeType string) bool {
normalized := normalizeMIMEType(mimeType)
switch MIMEType(normalized) {
case MIMETypeJPEG, MIMETypePNG, MIMETypeWebP, MIMETypeGIF, MIMETypeAVIF, MIMETypeSVG:
return true
default:
return false
}
}
// PeekAndValidate reads the minimum bytes needed for format detection,
// validates against the declared type, and returns a reader that includes
// those bytes for subsequent reading.
func PeekAndValidate(r io.Reader, declaredType string) (io.Reader, error) {
// Read minimum bytes for detection
buf := make([]byte, MinMagicBytes)
n, err := io.ReadFull(r, buf)
if err != nil && err != io.ErrUnexpectedEOF {
return nil, err
}
buf = buf[:n]
// Validate magic bytes
if err := ValidateMagicBytes(buf, declaredType); err != nil {
return nil, err
}
// Return a reader that includes the peeked bytes
return io.MultiReader(bytes.NewReader(buf), r), nil
}
// MIMEToImageFormat converts a MIME type to our ImageFormat type.
func MIMEToImageFormat(mimeType string) (ImageFormat, bool) {
normalized := normalizeMIMEType(mimeType)
switch MIMEType(normalized) {
case MIMETypeJPEG:
return FormatJPEG, true
case MIMETypePNG:
return FormatPNG, true
case MIMETypeWebP:
return FormatWebP, true
case MIMETypeGIF:
return FormatGIF, true
case MIMETypeAVIF:
return FormatAVIF, true
default:
return "", false
}
}
// ImageFormatToMIME converts our ImageFormat to a MIME type string.
func ImageFormatToMIME(format ImageFormat) string {
switch format {
case FormatJPEG:
return string(MIMETypeJPEG)
case FormatPNG:
return string(MIMETypePNG)
case FormatWebP:
return string(MIMETypeWebP)
case FormatGIF:
return string(MIMETypeGIF)
case FormatAVIF:
return string(MIMETypeAVIF)
default:
return "application/octet-stream"
}
}