Files
pixa/internal/imgcache/storage.go
sneak be293906bc Add type-safe hash types for cache storage
Define ContentHash, VariantKey, and PathHash types to replace
raw strings, providing compile-time type safety for storage
operations. Update storage layer to use typed parameters,
refactor cache to use variant storage keyed by VariantKey,
and implement source content reuse on cache misses.
2026-01-08 16:55:20 -08:00

500 lines
13 KiB
Go

package imgcache
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"time"
)
// Storage constants.
const (
// StorageDirPerm is the permission mode for storage directories.
StorageDirPerm = 0750
// MinHashLength is the minimum hash length for path splitting.
MinHashLength = 4
)
// Storage errors.
var (
ErrNotFound = errors.New("content not found")
)
// ContentHash is a SHA256 hash of file content (hex-encoded).
type ContentHash string
// VariantKey is a SHA256 hash identifying a specific image variant (hex-encoded).
type VariantKey string
// PathHash is a SHA256 hash of a URL path (hex-encoded).
type PathHash string
// ContentStorage handles content-addressable file storage.
// Files are stored at: <basedir>/<ab>/<cd>/<abcdef...sha256>
type ContentStorage struct {
baseDir string
}
// NewContentStorage creates a new content storage at the given base directory.
func NewContentStorage(baseDir string) (*ContentStorage, error) {
if err := os.MkdirAll(baseDir, StorageDirPerm); err != nil {
return nil, fmt.Errorf("failed to create storage directory: %w", err)
}
return &ContentStorage{baseDir: baseDir}, nil
}
// Store writes content to storage and returns its SHA256 hash.
// The content is read fully into memory to compute the hash before writing.
func (s *ContentStorage) Store(r io.Reader) (hash ContentHash, size int64, err error) {
// Read all content to compute hash
data, err := io.ReadAll(r)
if err != nil {
return "", 0, fmt.Errorf("failed to read content: %w", err)
}
// Compute hash
h := sha256.Sum256(data)
hash = ContentHash(hex.EncodeToString(h[:]))
size = int64(len(data))
// Build path: <basedir>/<ab>/<cd>/<hash>
path := s.hashToPath(hash)
// Check if already exists
if _, err := os.Stat(path); err == nil {
return hash, size, nil
}
// Create directory structure
dir := filepath.Dir(path)
if err := os.MkdirAll(dir, StorageDirPerm); err != nil {
return "", 0, fmt.Errorf("failed to create directory: %w", err)
}
// Write to temp file first, then rename for atomicity
tmpFile, err := os.CreateTemp(dir, ".tmp-*")
if err != nil {
return "", 0, fmt.Errorf("failed to create temp file: %w", err)
}
tmpPath := tmpFile.Name()
defer func() {
if err != nil {
_ = os.Remove(tmpPath)
}
}()
if _, err := tmpFile.Write(data); err != nil {
_ = tmpFile.Close()
return "", 0, fmt.Errorf("failed to write content: %w", err)
}
if err := tmpFile.Close(); err != nil {
return "", 0, fmt.Errorf("failed to close temp file: %w", err)
}
// Atomic rename
if err := os.Rename(tmpPath, path); err != nil {
return "", 0, fmt.Errorf("failed to rename temp file: %w", err)
}
return hash, size, nil
}
// Load returns a reader for the content with the given hash.
func (s *ContentStorage) Load(hash ContentHash) (io.ReadCloser, error) {
path := s.hashToPath(hash)
f, err := os.Open(path) //nolint:gosec // path derived from content hash
if err != nil {
if os.IsNotExist(err) {
return nil, ErrNotFound
}
return nil, fmt.Errorf("failed to open content: %w", err)
}
return f, nil
}
// LoadWithSize returns a reader and file size for the content with the given hash.
func (s *ContentStorage) LoadWithSize(hash ContentHash) (io.ReadCloser, int64, error) {
path := s.hashToPath(hash)
f, err := os.Open(path) //nolint:gosec // path derived from content hash
if err != nil {
if os.IsNotExist(err) {
return nil, 0, ErrNotFound
}
return nil, 0, fmt.Errorf("failed to open content: %w", err)
}
stat, err := f.Stat()
if err != nil {
_ = f.Close()
return nil, 0, fmt.Errorf("failed to stat content: %w", err)
}
return f, stat.Size(), nil
}
// Delete removes content with the given hash.
func (s *ContentStorage) Delete(hash ContentHash) error {
path := s.hashToPath(hash)
err := os.Remove(path)
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete content: %w", err)
}
return nil
}
// Exists checks if content with the given hash exists.
func (s *ContentStorage) Exists(hash ContentHash) bool {
path := s.hashToPath(hash)
_, err := os.Stat(path)
return err == nil
}
// hashToPath converts a hash to a file path: <basedir>/<ab>/<cd>/<hash>
func (s *ContentStorage) hashToPath(hash ContentHash) string {
h := string(hash)
if len(h) < MinHashLength {
return filepath.Join(s.baseDir, h)
}
return filepath.Join(s.baseDir, h[0:2], h[2:4], h)
}
// MetadataStorage handles JSON metadata file storage.
// Files are stored at: <basedir>/<hostname>/<path_hash>.json
type MetadataStorage struct {
baseDir string
}
// NewMetadataStorage creates a new metadata storage at the given base directory.
func NewMetadataStorage(baseDir string) (*MetadataStorage, error) {
if err := os.MkdirAll(baseDir, StorageDirPerm); err != nil {
return nil, fmt.Errorf("failed to create metadata directory: %w", err)
}
return &MetadataStorage{baseDir: baseDir}, nil
}
// SourceMetadata represents cached metadata about a source URL.
type SourceMetadata struct {
Host string `json:"host"`
Path string `json:"path"`
Query string `json:"query,omitempty"`
ContentHash string `json:"content_hash,omitempty"`
StatusCode int `json:"status_code"`
ContentType string `json:"content_type,omitempty"`
ContentLength int64 `json:"content_length,omitempty"`
ResponseHeaders map[string][]string `json:"response_headers,omitempty"`
FetchedAt int64 `json:"fetched_at"`
FetchDurationMs int64 `json:"fetch_duration_ms,omitempty"`
ExpiresAt int64 `json:"expires_at,omitempty"`
ETag string `json:"etag,omitempty"`
LastModified string `json:"last_modified,omitempty"`
RemoteAddr string `json:"remote_addr,omitempty"`
}
// Store writes metadata to storage.
func (s *MetadataStorage) Store(host string, pathHash PathHash, meta *SourceMetadata) error {
path := s.metaPath(host, pathHash)
// Create directory structure
dir := filepath.Dir(path)
if err := os.MkdirAll(dir, StorageDirPerm); err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}
// Marshal to JSON
data, err := json.MarshalIndent(meta, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal metadata: %w", err)
}
// Write to temp file first, then rename for atomicity
tmpFile, err := os.CreateTemp(dir, ".tmp-*.json")
if err != nil {
return fmt.Errorf("failed to create temp file: %w", err)
}
tmpPath := tmpFile.Name()
defer func() {
if err != nil {
_ = os.Remove(tmpPath)
}
}()
if _, err := tmpFile.Write(data); err != nil {
_ = tmpFile.Close()
return fmt.Errorf("failed to write metadata: %w", err)
}
if err := tmpFile.Close(); err != nil {
return fmt.Errorf("failed to close temp file: %w", err)
}
// Atomic rename
if err := os.Rename(tmpPath, path); err != nil {
return fmt.Errorf("failed to rename temp file: %w", err)
}
return nil
}
// Load reads metadata from storage.
func (s *MetadataStorage) Load(host string, pathHash PathHash) (*SourceMetadata, error) {
path := s.metaPath(host, pathHash)
data, err := os.ReadFile(path) //nolint:gosec // path derived from host+hash
if err != nil {
if os.IsNotExist(err) {
return nil, ErrNotFound
}
return nil, fmt.Errorf("failed to read metadata: %w", err)
}
var meta SourceMetadata
if err := json.Unmarshal(data, &meta); err != nil {
return nil, fmt.Errorf("failed to unmarshal metadata: %w", err)
}
return &meta, nil
}
// Delete removes metadata for the given host and path hash.
func (s *MetadataStorage) Delete(host string, pathHash PathHash) error {
path := s.metaPath(host, pathHash)
err := os.Remove(path)
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete metadata: %w", err)
}
return nil
}
// Exists checks if metadata exists for the given host and path hash.
func (s *MetadataStorage) Exists(host string, pathHash PathHash) bool {
path := s.metaPath(host, pathHash)
_, err := os.Stat(path)
return err == nil
}
// metaPath returns the file path for metadata: <basedir>/<host>/<path_hash>.json
func (s *MetadataStorage) metaPath(host string, pathHash PathHash) string {
return filepath.Join(s.baseDir, host, string(pathHash)+".json")
}
// HashPath computes the SHA256 hash of a path string.
func HashPath(path string) PathHash {
h := sha256.Sum256([]byte(path))
return PathHash(hex.EncodeToString(h[:]))
}
// CacheKey generates a unique key for a request variant.
// Format: sha256(host:path:query:width:height:format:quality:fit_mode)
func CacheKey(req *ImageRequest) VariantKey {
data := fmt.Sprintf("%s:%s:%s:%d:%d:%s:%d:%s",
req.SourceHost,
req.SourcePath,
req.SourceQuery,
req.Size.Width,
req.Size.Height,
req.Format,
req.Quality,
req.FitMode,
)
h := sha256.Sum256([]byte(data))
return VariantKey(hex.EncodeToString(h[:]))
}
// VariantStorage handles key-based file storage for processed image variants.
// Files are stored at: <basedir>/<ab>/<cd>/<cache_key>
// Metadata is stored at: <basedir>/<ab>/<cd>/<cache_key>.meta
// Unlike ContentStorage, the key is provided by the caller (not computed from content).
type VariantStorage struct {
baseDir string
}
// VariantMeta contains metadata about a cached variant.
type VariantMeta struct {
ContentType string `json:"content_type"`
Size int64 `json:"size"`
CreatedAt int64 `json:"created_at"`
}
// NewVariantStorage creates a new variant storage at the given base directory.
func NewVariantStorage(baseDir string) (*VariantStorage, error) {
if err := os.MkdirAll(baseDir, StorageDirPerm); err != nil {
return nil, fmt.Errorf("failed to create variant storage directory: %w", err)
}
return &VariantStorage{baseDir: baseDir}, nil
}
// Store writes content and metadata to storage at the given key.
func (s *VariantStorage) Store(key VariantKey, r io.Reader, contentType string) (size int64, err error) {
data, err := io.ReadAll(r)
if err != nil {
return 0, fmt.Errorf("failed to read content: %w", err)
}
size = int64(len(data))
path := s.keyToPath(key)
metaPath := path + ".meta"
// Create directory structure
dir := filepath.Dir(path)
if err := os.MkdirAll(dir, StorageDirPerm); err != nil {
return 0, fmt.Errorf("failed to create directory: %w", err)
}
// Write content to temp file first, then rename for atomicity
tmpFile, err := os.CreateTemp(dir, ".tmp-*")
if err != nil {
return 0, fmt.Errorf("failed to create temp file: %w", err)
}
tmpPath := tmpFile.Name()
defer func() {
if err != nil {
_ = os.Remove(tmpPath)
}
}()
if _, err := tmpFile.Write(data); err != nil {
_ = tmpFile.Close()
return 0, fmt.Errorf("failed to write content: %w", err)
}
if err := tmpFile.Close(); err != nil {
return 0, fmt.Errorf("failed to close temp file: %w", err)
}
// Atomic rename content
if err := os.Rename(tmpPath, path); err != nil {
return 0, fmt.Errorf("failed to rename temp file: %w", err)
}
// Write metadata
meta := VariantMeta{
ContentType: contentType,
Size: size,
CreatedAt: time.Now().UTC().Unix(),
}
metaData, err := json.Marshal(meta)
if err != nil {
return 0, fmt.Errorf("failed to marshal metadata: %w", err)
}
if err := os.WriteFile(metaPath, metaData, 0640); err != nil {
// Non-fatal, content is stored
_ = err
}
return size, nil
}
// Load returns a reader for the content at the given key.
func (s *VariantStorage) Load(key VariantKey) (io.ReadCloser, error) {
path := s.keyToPath(key)
f, err := os.Open(path) //nolint:gosec // path derived from cache key
if err != nil {
if os.IsNotExist(err) {
return nil, ErrNotFound
}
return nil, fmt.Errorf("failed to open content: %w", err)
}
return f, nil
}
// LoadWithMeta returns a reader, size, and content type for the content at the given key.
func (s *VariantStorage) LoadWithMeta(key VariantKey) (io.ReadCloser, int64, string, error) {
path := s.keyToPath(key)
metaPath := path + ".meta"
f, err := os.Open(path) //nolint:gosec // path derived from cache key
if err != nil {
if os.IsNotExist(err) {
return nil, 0, "", ErrNotFound
}
return nil, 0, "", fmt.Errorf("failed to open content: %w", err)
}
stat, err := f.Stat()
if err != nil {
_ = f.Close()
return nil, 0, "", fmt.Errorf("failed to stat content: %w", err)
}
// Load metadata for content type
contentType := "application/octet-stream" // fallback
metaData, err := os.ReadFile(metaPath) //nolint:gosec // path derived from cache key
if err == nil {
var meta VariantMeta
if json.Unmarshal(metaData, &meta) == nil && meta.ContentType != "" {
contentType = meta.ContentType
}
}
return f, stat.Size(), contentType, nil
}
// Exists checks if content exists at the given key.
func (s *VariantStorage) Exists(key VariantKey) bool {
path := s.keyToPath(key)
_, err := os.Stat(path)
return err == nil
}
// Delete removes content at the given key.
func (s *VariantStorage) Delete(key VariantKey) error {
path := s.keyToPath(key)
err := os.Remove(path)
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete content: %w", err)
}
return nil
}
// keyToPath converts a key to a file path: <basedir>/<ab>/<cd>/<key>
func (s *VariantStorage) keyToPath(key VariantKey) string {
k := string(key)
if len(k) < MinHashLength {
return filepath.Join(s.baseDir, k)
}
return filepath.Join(s.baseDir, k[0:2], k[2:4], k)
}