Files
pixa/internal/imgcache/storage.go
user fff7789dfb
Some checks failed
check / check (push) Has been cancelled
fix: update Dockerfile to Go 1.25.4 and resolve gosec lint findings
- Update Dockerfile base image from golang:1.24-alpine to golang:1.25.4-alpine
  (pinned by sha256 digest) to match go.mod requirement of go >= 1.25.4
- Fix gosec G703 (path traversal) false positives by adding filepath.Clean()
  at call sites with nolint annotations for internally-constructed paths
- Fix gosec G704 (SSRF) false positive with nolint annotation; URL is already
  validated by validateURL() which checks scheme, resolves DNS, and blocks
  private IPs
- All make check passes clean (lint + tests)
2026-02-25 05:44:43 -08:00

505 lines
14 KiB
Go

package imgcache
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"time"
)
// Storage constants.
const (
// StorageDirPerm is the permission mode for storage directories.
StorageDirPerm = 0750
// StorageFilePerm is the permission mode for storage files.
StorageFilePerm = 0600
// MinHashLength is the minimum hash length for path splitting.
MinHashLength = 4
)
// Storage errors.
var (
ErrNotFound = errors.New("content not found")
)
// ContentHash is a SHA256 hash of file content (hex-encoded).
type ContentHash string
// VariantKey is a SHA256 hash identifying a specific image variant (hex-encoded).
type VariantKey string
// PathHash is a SHA256 hash of a URL path (hex-encoded).
type PathHash string
// ContentStorage handles content-addressable file storage.
// Files are stored at: <basedir>/<ab>/<cd>/<abcdef...sha256>
type ContentStorage struct {
baseDir string
}
// NewContentStorage creates a new content storage at the given base directory.
func NewContentStorage(baseDir string) (*ContentStorage, error) {
if err := os.MkdirAll(baseDir, StorageDirPerm); err != nil {
return nil, fmt.Errorf("failed to create storage directory: %w", err)
}
return &ContentStorage{baseDir: baseDir}, nil
}
// Store writes content to storage and returns its SHA256 hash.
// The content is read fully into memory to compute the hash before writing.
func (s *ContentStorage) Store(r io.Reader) (hash ContentHash, size int64, err error) {
// Read all content to compute hash
data, err := io.ReadAll(r)
if err != nil {
return "", 0, fmt.Errorf("failed to read content: %w", err)
}
// Compute hash
h := sha256.Sum256(data)
hash = ContentHash(hex.EncodeToString(h[:]))
size = int64(len(data))
// Build path: <basedir>/<ab>/<cd>/<hash>
path := s.hashToPath(hash)
// Check if already exists
if _, err := os.Stat(path); err == nil {
return hash, size, nil
}
// Create directory structure
dir := filepath.Dir(path)
if err := os.MkdirAll(dir, StorageDirPerm); err != nil {
return "", 0, fmt.Errorf("failed to create directory: %w", err)
}
// Write to temp file first, then rename for atomicity
tmpFile, err := os.CreateTemp(dir, ".tmp-*")
if err != nil {
return "", 0, fmt.Errorf("failed to create temp file: %w", err)
}
tmpPath := tmpFile.Name()
defer func() {
if err != nil {
_ = os.Remove(tmpPath)
}
}()
if _, err := tmpFile.Write(data); err != nil {
_ = tmpFile.Close()
return "", 0, fmt.Errorf("failed to write content: %w", err)
}
if err := tmpFile.Close(); err != nil {
return "", 0, fmt.Errorf("failed to close temp file: %w", err)
}
// Atomic rename
//nolint:gosec // G703: paths from internal SHA256 hashes
if err := os.Rename(filepath.Clean(tmpPath), filepath.Clean(path)); err != nil {
return "", 0, fmt.Errorf("failed to rename temp file: %w", err)
}
return hash, size, nil
}
// Load returns a reader for the content with the given hash.
func (s *ContentStorage) Load(hash ContentHash) (io.ReadCloser, error) {
path := s.hashToPath(hash)
f, err := os.Open(path) //nolint:gosec // path derived from content hash
if err != nil {
if os.IsNotExist(err) {
return nil, ErrNotFound
}
return nil, fmt.Errorf("failed to open content: %w", err)
}
return f, nil
}
// LoadWithSize returns a reader and file size for the content with the given hash.
func (s *ContentStorage) LoadWithSize(hash ContentHash) (io.ReadCloser, int64, error) {
path := s.hashToPath(hash)
f, err := os.Open(path) //nolint:gosec // path derived from content hash
if err != nil {
if os.IsNotExist(err) {
return nil, 0, ErrNotFound
}
return nil, 0, fmt.Errorf("failed to open content: %w", err)
}
stat, err := f.Stat()
if err != nil {
_ = f.Close()
return nil, 0, fmt.Errorf("failed to stat content: %w", err)
}
return f, stat.Size(), nil
}
// Delete removes content with the given hash.
func (s *ContentStorage) Delete(hash ContentHash) error {
path := s.hashToPath(hash)
err := os.Remove(path)
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete content: %w", err)
}
return nil
}
// Exists checks if content with the given hash exists.
func (s *ContentStorage) Exists(hash ContentHash) bool {
path := s.hashToPath(hash)
_, err := os.Stat(path)
return err == nil
}
// hashToPath converts a hash to a file path: <basedir>/<ab>/<cd>/<hash>
func (s *ContentStorage) hashToPath(hash ContentHash) string {
h := string(hash)
if len(h) < MinHashLength {
return filepath.Clean(filepath.Join(s.baseDir, h))
}
return filepath.Clean(filepath.Join(s.baseDir, h[0:2], h[2:4], h))
}
// MetadataStorage handles JSON metadata file storage.
// Files are stored at: <basedir>/<hostname>/<path_hash>.json
type MetadataStorage struct {
baseDir string
}
// NewMetadataStorage creates a new metadata storage at the given base directory.
func NewMetadataStorage(baseDir string) (*MetadataStorage, error) {
if err := os.MkdirAll(baseDir, StorageDirPerm); err != nil {
return nil, fmt.Errorf("failed to create metadata directory: %w", err)
}
return &MetadataStorage{baseDir: baseDir}, nil
}
// SourceMetadata represents cached metadata about a source URL.
type SourceMetadata struct {
Host string `json:"host"`
Path string `json:"path"`
Query string `json:"query,omitempty"`
ContentHash string `json:"content_hash,omitempty"`
StatusCode int `json:"status_code"`
ContentType string `json:"content_type,omitempty"`
ContentLength int64 `json:"content_length,omitempty"`
ResponseHeaders map[string][]string `json:"response_headers,omitempty"`
FetchedAt int64 `json:"fetched_at"`
FetchDurationMs int64 `json:"fetch_duration_ms,omitempty"`
ExpiresAt int64 `json:"expires_at,omitempty"`
ETag string `json:"etag,omitempty"`
LastModified string `json:"last_modified,omitempty"`
RemoteAddr string `json:"remote_addr,omitempty"`
}
// Store writes metadata to storage.
func (s *MetadataStorage) Store(host string, pathHash PathHash, meta *SourceMetadata) error {
path := s.metaPath(host, pathHash)
// Create directory structure
dir := filepath.Dir(path)
if err := os.MkdirAll(dir, StorageDirPerm); err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}
// Marshal to JSON
data, err := json.MarshalIndent(meta, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal metadata: %w", err)
}
// Write to temp file first, then rename for atomicity
tmpFile, err := os.CreateTemp(dir, ".tmp-*.json")
if err != nil {
return fmt.Errorf("failed to create temp file: %w", err)
}
tmpPath := tmpFile.Name()
defer func() {
if err != nil {
_ = os.Remove(tmpPath)
}
}()
if _, err := tmpFile.Write(data); err != nil {
_ = tmpFile.Close()
return fmt.Errorf("failed to write metadata: %w", err)
}
if err := tmpFile.Close(); err != nil {
return fmt.Errorf("failed to close temp file: %w", err)
}
// Atomic rename
//nolint:gosec // G703: paths from internal SHA256 hashes
if err := os.Rename(filepath.Clean(tmpPath), filepath.Clean(path)); err != nil {
return fmt.Errorf("failed to rename temp file: %w", err)
}
return nil
}
// Load reads metadata from storage.
func (s *MetadataStorage) Load(host string, pathHash PathHash) (*SourceMetadata, error) {
path := s.metaPath(host, pathHash)
data, err := os.ReadFile(path) //nolint:gosec // path derived from host+hash
if err != nil {
if os.IsNotExist(err) {
return nil, ErrNotFound
}
return nil, fmt.Errorf("failed to read metadata: %w", err)
}
var meta SourceMetadata
if err := json.Unmarshal(data, &meta); err != nil {
return nil, fmt.Errorf("failed to unmarshal metadata: %w", err)
}
return &meta, nil
}
// Delete removes metadata for the given host and path hash.
func (s *MetadataStorage) Delete(host string, pathHash PathHash) error {
path := s.metaPath(host, pathHash)
err := os.Remove(path)
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete metadata: %w", err)
}
return nil
}
// Exists checks if metadata exists for the given host and path hash.
func (s *MetadataStorage) Exists(host string, pathHash PathHash) bool {
path := s.metaPath(host, pathHash)
_, err := os.Stat(path)
return err == nil
}
// metaPath returns the file path for metadata: <basedir>/<host>/<path_hash>.json
func (s *MetadataStorage) metaPath(host string, pathHash PathHash) string {
return filepath.Clean(filepath.Join(s.baseDir, host, string(pathHash)+".json"))
}
// HashPath computes the SHA256 hash of a path string.
func HashPath(path string) PathHash {
h := sha256.Sum256([]byte(path))
return PathHash(hex.EncodeToString(h[:]))
}
// CacheKey generates a unique key for a request variant.
// Format: sha256(host:path:query:width:height:format:quality:fit_mode)
func CacheKey(req *ImageRequest) VariantKey {
data := fmt.Sprintf("%s:%s:%s:%d:%d:%s:%d:%s",
req.SourceHost,
req.SourcePath,
req.SourceQuery,
req.Size.Width,
req.Size.Height,
req.Format,
req.Quality,
req.FitMode,
)
h := sha256.Sum256([]byte(data))
return VariantKey(hex.EncodeToString(h[:]))
}
// VariantStorage handles key-based file storage for processed image variants.
// Files are stored at: <basedir>/<ab>/<cd>/<cache_key>
// Metadata is stored at: <basedir>/<ab>/<cd>/<cache_key>.meta
// Unlike ContentStorage, the key is provided by the caller (not computed from content).
type VariantStorage struct {
baseDir string
}
// VariantMeta contains metadata about a cached variant.
type VariantMeta struct {
ContentType string `json:"content_type"`
Size int64 `json:"size"`
CreatedAt int64 `json:"created_at"`
}
// NewVariantStorage creates a new variant storage at the given base directory.
func NewVariantStorage(baseDir string) (*VariantStorage, error) {
if err := os.MkdirAll(baseDir, StorageDirPerm); err != nil {
return nil, fmt.Errorf("failed to create variant storage directory: %w", err)
}
return &VariantStorage{baseDir: baseDir}, nil
}
// Store writes content and metadata to storage at the given key.
func (s *VariantStorage) Store(key VariantKey, r io.Reader, contentType string) (size int64, err error) {
data, err := io.ReadAll(r)
if err != nil {
return 0, fmt.Errorf("failed to read content: %w", err)
}
size = int64(len(data))
path := s.keyToPath(key)
metaPath := path + ".meta"
// Create directory structure
dir := filepath.Dir(path)
if err := os.MkdirAll(dir, StorageDirPerm); err != nil {
return 0, fmt.Errorf("failed to create directory: %w", err)
}
// Write content to temp file first, then rename for atomicity
tmpFile, err := os.CreateTemp(dir, ".tmp-*")
if err != nil {
return 0, fmt.Errorf("failed to create temp file: %w", err)
}
tmpPath := tmpFile.Name()
defer func() {
if err != nil {
_ = os.Remove(tmpPath)
}
}()
if _, err := tmpFile.Write(data); err != nil {
_ = tmpFile.Close()
return 0, fmt.Errorf("failed to write content: %w", err)
}
if err := tmpFile.Close(); err != nil {
return 0, fmt.Errorf("failed to close temp file: %w", err)
}
// Atomic rename content
//nolint:gosec // G703: paths from internal SHA256 hashes
if err := os.Rename(filepath.Clean(tmpPath), filepath.Clean(path)); err != nil {
return 0, fmt.Errorf("failed to rename temp file: %w", err)
}
// Write metadata
meta := VariantMeta{
ContentType: contentType,
Size: size,
CreatedAt: time.Now().UTC().Unix(),
}
metaData, err := json.Marshal(meta)
if err != nil {
return 0, fmt.Errorf("failed to marshal metadata: %w", err)
}
if err := os.WriteFile(metaPath, metaData, StorageFilePerm); err != nil {
// Non-fatal, content is stored
_ = err
}
return size, nil
}
// Load returns a reader for the content at the given key.
func (s *VariantStorage) Load(key VariantKey) (io.ReadCloser, error) {
path := s.keyToPath(key)
f, err := os.Open(path) //nolint:gosec // path derived from cache key
if err != nil {
if os.IsNotExist(err) {
return nil, ErrNotFound
}
return nil, fmt.Errorf("failed to open content: %w", err)
}
return f, nil
}
// LoadWithMeta returns a reader, size, and content type for the content at the given key.
func (s *VariantStorage) LoadWithMeta(key VariantKey) (io.ReadCloser, int64, string, error) {
path := s.keyToPath(key)
metaPath := path + ".meta"
f, err := os.Open(path) //nolint:gosec // path derived from cache key
if err != nil {
if os.IsNotExist(err) {
return nil, 0, "", ErrNotFound
}
return nil, 0, "", fmt.Errorf("failed to open content: %w", err)
}
stat, err := f.Stat()
if err != nil {
_ = f.Close()
return nil, 0, "", fmt.Errorf("failed to stat content: %w", err)
}
// Load metadata for content type
contentType := "application/octet-stream" // fallback
metaData, err := os.ReadFile(metaPath) //nolint:gosec // path derived from cache key
if err == nil {
var meta VariantMeta
if json.Unmarshal(metaData, &meta) == nil && meta.ContentType != "" {
contentType = meta.ContentType
}
}
return f, stat.Size(), contentType, nil
}
// Exists checks if content exists at the given key.
func (s *VariantStorage) Exists(key VariantKey) bool {
path := s.keyToPath(key)
_, err := os.Stat(path)
return err == nil
}
// Delete removes content at the given key.
func (s *VariantStorage) Delete(key VariantKey) error {
path := s.keyToPath(key)
err := os.Remove(path)
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete content: %w", err)
}
return nil
}
// keyToPath converts a key to a file path: <basedir>/<ab>/<cd>/<key>
func (s *VariantStorage) keyToPath(key VariantKey) string {
k := string(key)
if len(k) < MinHashLength {
return filepath.Join(s.baseDir, k)
}
return filepath.Join(s.baseDir, k[0:2], k[2:4], k)
}