Add content-addressable storage and cache key generation

ContentStorage stores blobs at <dir>/<ab>/<cd>/<sha256> paths.
MetadataStorage stores JSON at <dir>/<host>/<path_hash>.json.
CacheKey generates unique keys from request parameters.
This commit is contained in:
2026-01-08 03:35:50 -08:00
parent 4595929275
commit 2f20c71da0
2 changed files with 650 additions and 0 deletions

View File

@@ -0,0 +1,297 @@
package imgcache
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"path/filepath"
)
// Storage constants.
const (
// StorageDirPerm is the permission mode for storage directories.
StorageDirPerm = 0750
// MinHashLength is the minimum hash length for path splitting.
MinHashLength = 4
)
// Storage errors.
var (
ErrNotFound = errors.New("content not found")
)
// ContentStorage handles content-addressable file storage.
// Files are stored at: <basedir>/<ab>/<cd>/<abcdef...sha256>
type ContentStorage struct {
baseDir string
}
// NewContentStorage creates a new content storage at the given base directory.
func NewContentStorage(baseDir string) (*ContentStorage, error) {
if err := os.MkdirAll(baseDir, StorageDirPerm); err != nil {
return nil, fmt.Errorf("failed to create storage directory: %w", err)
}
return &ContentStorage{baseDir: baseDir}, nil
}
// Store writes content to storage and returns its SHA256 hash.
// The content is read fully into memory to compute the hash before writing.
func (s *ContentStorage) Store(r io.Reader) (hash string, size int64, err error) {
// Read all content to compute hash
data, err := io.ReadAll(r)
if err != nil {
return "", 0, fmt.Errorf("failed to read content: %w", err)
}
// Compute hash
h := sha256.Sum256(data)
hash = hex.EncodeToString(h[:])
size = int64(len(data))
// Build path: <basedir>/<ab>/<cd>/<hash>
path := s.hashToPath(hash)
// Check if already exists
if _, err := os.Stat(path); err == nil {
return hash, size, nil
}
// Create directory structure
dir := filepath.Dir(path)
if err := os.MkdirAll(dir, StorageDirPerm); err != nil {
return "", 0, fmt.Errorf("failed to create directory: %w", err)
}
// Write to temp file first, then rename for atomicity
tmpFile, err := os.CreateTemp(dir, ".tmp-*")
if err != nil {
return "", 0, fmt.Errorf("failed to create temp file: %w", err)
}
tmpPath := tmpFile.Name()
defer func() {
if err != nil {
_ = os.Remove(tmpPath)
}
}()
if _, err := tmpFile.Write(data); err != nil {
_ = tmpFile.Close()
return "", 0, fmt.Errorf("failed to write content: %w", err)
}
if err := tmpFile.Close(); err != nil {
return "", 0, fmt.Errorf("failed to close temp file: %w", err)
}
// Atomic rename
if err := os.Rename(tmpPath, path); err != nil {
return "", 0, fmt.Errorf("failed to rename temp file: %w", err)
}
return hash, size, nil
}
// Load returns a reader for the content with the given hash.
func (s *ContentStorage) Load(hash string) (io.ReadCloser, error) {
path := s.hashToPath(hash)
f, err := os.Open(path) //nolint:gosec // content-addressable path from hash
if err != nil {
if os.IsNotExist(err) {
return nil, ErrNotFound
}
return nil, fmt.Errorf("failed to open content: %w", err)
}
return f, nil
}
// Delete removes content with the given hash.
func (s *ContentStorage) Delete(hash string) error {
path := s.hashToPath(hash)
err := os.Remove(path)
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete content: %w", err)
}
return nil
}
// Exists checks if content with the given hash exists.
func (s *ContentStorage) Exists(hash string) bool {
path := s.hashToPath(hash)
_, err := os.Stat(path)
return err == nil
}
// Path returns the file path for a given hash.
func (s *ContentStorage) Path(hash string) string {
return s.hashToPath(hash)
}
// hashToPath converts a hash to a file path: <basedir>/<ab>/<cd>/<hash>
func (s *ContentStorage) hashToPath(hash string) string {
if len(hash) < MinHashLength {
return filepath.Join(s.baseDir, hash)
}
return filepath.Join(s.baseDir, hash[0:2], hash[2:4], hash)
}
// MetadataStorage handles JSON metadata file storage.
// Files are stored at: <basedir>/<hostname>/<path_hash>.json
type MetadataStorage struct {
baseDir string
}
// NewMetadataStorage creates a new metadata storage at the given base directory.
func NewMetadataStorage(baseDir string) (*MetadataStorage, error) {
if err := os.MkdirAll(baseDir, StorageDirPerm); err != nil {
return nil, fmt.Errorf("failed to create metadata directory: %w", err)
}
return &MetadataStorage{baseDir: baseDir}, nil
}
// SourceMetadata represents cached metadata about a source URL.
type SourceMetadata struct {
Host string `json:"host"`
Path string `json:"path"`
Query string `json:"query,omitempty"`
ContentHash string `json:"content_hash,omitempty"`
StatusCode int `json:"status_code"`
ContentType string `json:"content_type,omitempty"`
ResponseHeaders map[string][]string `json:"response_headers,omitempty"`
FetchedAt int64 `json:"fetched_at"`
ExpiresAt int64 `json:"expires_at,omitempty"`
ETag string `json:"etag,omitempty"`
LastModified string `json:"last_modified,omitempty"`
}
// Store writes metadata to storage.
func (s *MetadataStorage) Store(host, pathHash string, meta *SourceMetadata) error {
path := s.metaPath(host, pathHash)
// Create directory structure
dir := filepath.Dir(path)
if err := os.MkdirAll(dir, StorageDirPerm); err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}
// Marshal to JSON
data, err := json.MarshalIndent(meta, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal metadata: %w", err)
}
// Write to temp file first, then rename for atomicity
tmpFile, err := os.CreateTemp(dir, ".tmp-*.json")
if err != nil {
return fmt.Errorf("failed to create temp file: %w", err)
}
tmpPath := tmpFile.Name()
defer func() {
if err != nil {
_ = os.Remove(tmpPath)
}
}()
if _, err := tmpFile.Write(data); err != nil {
_ = tmpFile.Close()
return fmt.Errorf("failed to write metadata: %w", err)
}
if err := tmpFile.Close(); err != nil {
return fmt.Errorf("failed to close temp file: %w", err)
}
// Atomic rename
if err := os.Rename(tmpPath, path); err != nil {
return fmt.Errorf("failed to rename temp file: %w", err)
}
return nil
}
// Load reads metadata from storage.
func (s *MetadataStorage) Load(host, pathHash string) (*SourceMetadata, error) {
path := s.metaPath(host, pathHash)
data, err := os.ReadFile(path) //nolint:gosec // path derived from host+hash
if err != nil {
if os.IsNotExist(err) {
return nil, ErrNotFound
}
return nil, fmt.Errorf("failed to read metadata: %w", err)
}
var meta SourceMetadata
if err := json.Unmarshal(data, &meta); err != nil {
return nil, fmt.Errorf("failed to unmarshal metadata: %w", err)
}
return &meta, nil
}
// Delete removes metadata for the given host and path hash.
func (s *MetadataStorage) Delete(host, pathHash string) error {
path := s.metaPath(host, pathHash)
err := os.Remove(path)
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete metadata: %w", err)
}
return nil
}
// Exists checks if metadata exists for the given host and path hash.
func (s *MetadataStorage) Exists(host, pathHash string) bool {
path := s.metaPath(host, pathHash)
_, err := os.Stat(path)
return err == nil
}
// metaPath returns the file path for metadata: <basedir>/<host>/<path_hash>.json
func (s *MetadataStorage) metaPath(host, pathHash string) string {
return filepath.Join(s.baseDir, host, pathHash+".json")
}
// HashPath computes the SHA256 hash of a path string.
func HashPath(path string) string {
h := sha256.Sum256([]byte(path))
return hex.EncodeToString(h[:])
}
// CacheKey generates a unique cache key for a request.
// Format: sha256(host:path:query:width:height:format:quality:fit_mode)
func CacheKey(req *ImageRequest) string {
data := fmt.Sprintf("%s:%s:%s:%d:%d:%s:%d:%s",
req.SourceHost,
req.SourcePath,
req.SourceQuery,
req.Size.Width,
req.Size.Height,
req.Format,
req.Quality,
req.FitMode,
)
h := sha256.Sum256([]byte(data))
return hex.EncodeToString(h[:])
}

View File

@@ -0,0 +1,353 @@
package imgcache
import (
"bytes"
"io"
"os"
"path/filepath"
"testing"
)
func TestContentStorage_StoreAndLoad(t *testing.T) {
tmpDir := t.TempDir()
storage, err := NewContentStorage(tmpDir)
if err != nil {
t.Fatalf("NewContentStorage() error = %v", err)
}
content := []byte("hello world")
hash, size, err := storage.Store(bytes.NewReader(content))
if err != nil {
t.Fatalf("Store() error = %v", err)
}
if size != int64(len(content)) {
t.Errorf("Store() size = %d, want %d", size, len(content))
}
if hash == "" {
t.Error("Store() returned empty hash")
}
// Verify file exists at expected path
expectedPath := filepath.Join(tmpDir, hash[0:2], hash[2:4], hash)
if _, err := os.Stat(expectedPath); err != nil {
t.Errorf("File not at expected path %s: %v", expectedPath, err)
}
// Load and verify content
r, err := storage.Load(hash)
if err != nil {
t.Fatalf("Load() error = %v", err)
}
defer r.Close()
loaded, err := io.ReadAll(r)
if err != nil {
t.Fatalf("ReadAll() error = %v", err)
}
if !bytes.Equal(loaded, content) {
t.Errorf("Load() content = %q, want %q", loaded, content)
}
}
func TestContentStorage_StoreIdempotent(t *testing.T) {
tmpDir := t.TempDir()
storage, err := NewContentStorage(tmpDir)
if err != nil {
t.Fatalf("NewContentStorage() error = %v", err)
}
content := []byte("same content")
hash1, _, err := storage.Store(bytes.NewReader(content))
if err != nil {
t.Fatalf("Store() first error = %v", err)
}
hash2, _, err := storage.Store(bytes.NewReader(content))
if err != nil {
t.Fatalf("Store() second error = %v", err)
}
if hash1 != hash2 {
t.Errorf("Store() hashes differ: %s vs %s", hash1, hash2)
}
}
func TestContentStorage_LoadNotFound(t *testing.T) {
tmpDir := t.TempDir()
storage, err := NewContentStorage(tmpDir)
if err != nil {
t.Fatalf("NewContentStorage() error = %v", err)
}
_, err = storage.Load("nonexistent")
if err != ErrNotFound {
t.Errorf("Load() error = %v, want ErrNotFound", err)
}
}
func TestContentStorage_Delete(t *testing.T) {
tmpDir := t.TempDir()
storage, err := NewContentStorage(tmpDir)
if err != nil {
t.Fatalf("NewContentStorage() error = %v", err)
}
content := []byte("to be deleted")
hash, _, err := storage.Store(bytes.NewReader(content))
if err != nil {
t.Fatalf("Store() error = %v", err)
}
if !storage.Exists(hash) {
t.Error("Exists() = false, want true")
}
if err := storage.Delete(hash); err != nil {
t.Fatalf("Delete() error = %v", err)
}
if storage.Exists(hash) {
t.Error("Exists() = true after delete, want false")
}
}
func TestContentStorage_DeleteNonexistent(t *testing.T) {
tmpDir := t.TempDir()
storage, err := NewContentStorage(tmpDir)
if err != nil {
t.Fatalf("NewContentStorage() error = %v", err)
}
// Should not error
if err := storage.Delete("nonexistent"); err != nil {
t.Errorf("Delete() error = %v, want nil", err)
}
}
func TestContentStorage_Path(t *testing.T) {
tmpDir := t.TempDir()
storage, err := NewContentStorage(tmpDir)
if err != nil {
t.Fatalf("NewContentStorage() error = %v", err)
}
hash := "abcdef0123456789"
path := storage.Path(hash)
expected := filepath.Join(tmpDir, "ab", "cd", hash)
if path != expected {
t.Errorf("Path() = %q, want %q", path, expected)
}
}
func TestMetadataStorage_StoreAndLoad(t *testing.T) {
tmpDir := t.TempDir()
storage, err := NewMetadataStorage(tmpDir)
if err != nil {
t.Fatalf("NewMetadataStorage() error = %v", err)
}
meta := &SourceMetadata{
Host: "cdn.example.com",
Path: "/photos/cat.jpg",
ContentHash: "abc123",
StatusCode: 200,
ContentType: "image/jpeg",
FetchedAt: 1704067200,
ETag: `"etag123"`,
}
pathHash := HashPath("/photos/cat.jpg")
err = storage.Store("cdn.example.com", pathHash, meta)
if err != nil {
t.Fatalf("Store() error = %v", err)
}
// Verify file exists at expected path
expectedPath := filepath.Join(tmpDir, "cdn.example.com", pathHash+".json")
if _, err := os.Stat(expectedPath); err != nil {
t.Errorf("File not at expected path %s: %v", expectedPath, err)
}
// Load and verify
loaded, err := storage.Load("cdn.example.com", pathHash)
if err != nil {
t.Fatalf("Load() error = %v", err)
}
if loaded.Host != meta.Host {
t.Errorf("Host = %q, want %q", loaded.Host, meta.Host)
}
if loaded.Path != meta.Path {
t.Errorf("Path = %q, want %q", loaded.Path, meta.Path)
}
if loaded.ContentHash != meta.ContentHash {
t.Errorf("ContentHash = %q, want %q", loaded.ContentHash, meta.ContentHash)
}
if loaded.StatusCode != meta.StatusCode {
t.Errorf("StatusCode = %d, want %d", loaded.StatusCode, meta.StatusCode)
}
if loaded.ETag != meta.ETag {
t.Errorf("ETag = %q, want %q", loaded.ETag, meta.ETag)
}
}
func TestMetadataStorage_LoadNotFound(t *testing.T) {
tmpDir := t.TempDir()
storage, err := NewMetadataStorage(tmpDir)
if err != nil {
t.Fatalf("NewMetadataStorage() error = %v", err)
}
_, err = storage.Load("example.com", "nonexistent")
if err != ErrNotFound {
t.Errorf("Load() error = %v, want ErrNotFound", err)
}
}
func TestMetadataStorage_Delete(t *testing.T) {
tmpDir := t.TempDir()
storage, err := NewMetadataStorage(tmpDir)
if err != nil {
t.Fatalf("NewMetadataStorage() error = %v", err)
}
meta := &SourceMetadata{
Host: "example.com",
Path: "/test.jpg",
StatusCode: 200,
}
pathHash := HashPath("/test.jpg")
err = storage.Store("example.com", pathHash, meta)
if err != nil {
t.Fatalf("Store() error = %v", err)
}
if !storage.Exists("example.com", pathHash) {
t.Error("Exists() = false, want true")
}
if err := storage.Delete("example.com", pathHash); err != nil {
t.Fatalf("Delete() error = %v", err)
}
if storage.Exists("example.com", pathHash) {
t.Error("Exists() = true after delete, want false")
}
}
func TestHashPath(t *testing.T) {
// Same input should produce same hash
hash1 := HashPath("/photos/cat.jpg")
hash2 := HashPath("/photos/cat.jpg")
if hash1 != hash2 {
t.Errorf("HashPath() not deterministic: %s vs %s", hash1, hash2)
}
// Different input should produce different hash
hash3 := HashPath("/photos/dog.jpg")
if hash1 == hash3 {
t.Error("HashPath() produced same hash for different inputs")
}
// Hash should be 64 hex chars (256 bits)
if len(hash1) != 64 {
t.Errorf("HashPath() length = %d, want 64", len(hash1))
}
}
func TestCacheKey(t *testing.T) {
req1 := &ImageRequest{
SourceHost: "cdn.example.com",
SourcePath: "/photos/cat.jpg",
SourceQuery: "",
Size: Size{Width: 800, Height: 600},
Format: FormatWebP,
Quality: 85,
FitMode: FitCover,
}
req2 := &ImageRequest{
SourceHost: "cdn.example.com",
SourcePath: "/photos/cat.jpg",
SourceQuery: "",
Size: Size{Width: 800, Height: 600},
Format: FormatWebP,
Quality: 85,
FitMode: FitCover,
}
// Same request should produce same key
key1 := CacheKey(req1)
key2 := CacheKey(req2)
if key1 != key2 {
t.Errorf("CacheKey() not deterministic: %s vs %s", key1, key2)
}
// Key should be 64 hex chars
if len(key1) != 64 {
t.Errorf("CacheKey() length = %d, want 64", len(key1))
}
// Different size should produce different key
req3 := &ImageRequest{
SourceHost: "cdn.example.com",
SourcePath: "/photos/cat.jpg",
SourceQuery: "",
Size: Size{Width: 400, Height: 300}, // Different size
Format: FormatWebP,
Quality: 85,
FitMode: FitCover,
}
key3 := CacheKey(req3)
if key1 == key3 {
t.Error("CacheKey() produced same key for different sizes")
}
// Different format should produce different key
req4 := &ImageRequest{
SourceHost: "cdn.example.com",
SourcePath: "/photos/cat.jpg",
SourceQuery: "",
Size: Size{Width: 800, Height: 600},
Format: FormatPNG, // Different format
Quality: 85,
FitMode: FitCover,
}
key4 := CacheKey(req4)
if key1 == key4 {
t.Error("CacheKey() produced same key for different formats")
}
// Different quality should produce different key
req5 := &ImageRequest{
SourceHost: "cdn.example.com",
SourcePath: "/photos/cat.jpg",
SourceQuery: "",
Size: Size{Width: 800, Height: 600},
Format: FormatWebP,
Quality: 50, // Different quality
FitMode: FitCover,
}
key5 := CacheKey(req5)
if key1 == key5 {
t.Error("CacheKey() produced same key for different quality")
}
}