Refactor blob storage to use UUID primary keys and implement streaming chunking

- Changed blob table to use ID (UUID) as primary key instead of hash
- Blob records are now created at packing start, enabling immediate chunk associations
- Implemented streaming chunking to process large files without memory exhaustion
- Fixed blob manifest generation to include all referenced blobs
- Updated all foreign key references from blob_hash to blob_id
- Added progress reporting and improved error handling
- Enforced encryption requirement for all blob packing
- Updated tests to use test encryption keys
- Added Cyrillic transliteration to README
This commit is contained in:
2025-07-22 07:43:39 +02:00
parent 26db096913
commit 86b533d6ee
49 changed files with 5709 additions and 324 deletions

View File

@@ -11,10 +11,10 @@ import (
// Config represents the application configuration
type Config struct {
AgeRecipient string `yaml:"age_recipient"`
AgeRecipients []string `yaml:"age_recipients"`
BackupInterval time.Duration `yaml:"backup_interval"`
BlobSizeLimit int64 `yaml:"blob_size_limit"`
ChunkSize int64 `yaml:"chunk_size"`
BlobSizeLimit Size `yaml:"blob_size_limit"`
ChunkSize Size `yaml:"chunk_size"`
Exclude []string `yaml:"exclude"`
FullScanInterval time.Duration `yaml:"full_scan_interval"`
Hostname string `yaml:"hostname"`
@@ -35,7 +35,7 @@ type S3Config struct {
SecretAccessKey string `yaml:"secret_access_key"`
Region string `yaml:"region"`
UseSSL bool `yaml:"use_ssl"`
PartSize int64 `yaml:"part_size"`
PartSize Size `yaml:"part_size"`
}
// ConfigPath wraps the config file path for fx injection
@@ -64,8 +64,8 @@ func Load(path string) (*Config, error) {
cfg := &Config{
// Set defaults
BlobSizeLimit: 10 * 1024 * 1024 * 1024, // 10GB
ChunkSize: 10 * 1024 * 1024, // 10MB
BlobSizeLimit: Size(10 * 1024 * 1024 * 1024), // 10GB
ChunkSize: Size(10 * 1024 * 1024), // 10MB
BackupInterval: 1 * time.Hour,
FullScanInterval: 24 * time.Hour,
MinTimeBetweenRun: 15 * time.Minute,
@@ -97,7 +97,7 @@ func Load(path string) (*Config, error) {
cfg.S3.Region = "us-east-1"
}
if cfg.S3.PartSize == 0 {
cfg.S3.PartSize = 5 * 1024 * 1024 // 5MB
cfg.S3.PartSize = Size(5 * 1024 * 1024) // 5MB
}
if err := cfg.Validate(); err != nil {
@@ -109,8 +109,8 @@ func Load(path string) (*Config, error) {
// Validate checks if the configuration is valid
func (c *Config) Validate() error {
if c.AgeRecipient == "" {
return fmt.Errorf("age_recipient is required")
if len(c.AgeRecipients) == 0 {
return fmt.Errorf("at least one age_recipient is required")
}
if len(c.SourceDirs) == 0 {
@@ -133,11 +133,11 @@ func (c *Config) Validate() error {
return fmt.Errorf("s3.secret_access_key is required")
}
if c.ChunkSize < 1024*1024 { // 1MB minimum
if c.ChunkSize.Int64() < 1024*1024 { // 1MB minimum
return fmt.Errorf("chunk_size must be at least 1MB")
}
if c.BlobSizeLimit < c.ChunkSize {
if c.BlobSizeLimit.Int64() < c.ChunkSize.Int64() {
return fmt.Errorf("blob_size_limit must be at least chunk_size")
}

View File

@@ -6,6 +6,12 @@ import (
"testing"
)
const (
TEST_SNEAK_AGE_PUBLIC_KEY = "age1278m9q7dp3chsh2dcy82qk27v047zywyvtxwnj4cvt0z65jw6a7q5dqhfj"
TEST_INTEGRATION_AGE_PUBLIC_KEY = "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
TEST_INTEGRATION_AGE_PRIVATE_KEY = "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
)
func TestMain(m *testing.M) {
// Set up test environment
testConfigPath := filepath.Join("..", "..", "test", "config.yaml")
@@ -32,8 +38,11 @@ func TestConfigLoad(t *testing.T) {
}
// Basic validation
if cfg.AgeRecipient != "age1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" {
t.Errorf("Expected age recipient to be set, got '%s'", cfg.AgeRecipient)
if len(cfg.AgeRecipients) != 2 {
t.Errorf("Expected 2 age recipients, got %d", len(cfg.AgeRecipients))
}
if cfg.AgeRecipients[0] != TEST_SNEAK_AGE_PUBLIC_KEY {
t.Errorf("Expected first age recipient to be %s, got '%s'", TEST_SNEAK_AGE_PUBLIC_KEY, cfg.AgeRecipients[0])
}
if len(cfg.SourceDirs) != 2 {

45
internal/config/size.go Normal file
View File

@@ -0,0 +1,45 @@
package config
import (
"fmt"
"github.com/dustin/go-humanize"
)
// Size is a custom type that can unmarshal from both int64 and string
type Size int64
// UnmarshalYAML implements yaml.Unmarshaler for Size
func (s *Size) UnmarshalYAML(unmarshal func(interface{}) error) error {
// Try to unmarshal as int64 first
var intVal int64
if err := unmarshal(&intVal); err == nil {
*s = Size(intVal)
return nil
}
// Try to unmarshal as string
var strVal string
if err := unmarshal(&strVal); err != nil {
return fmt.Errorf("size must be a number or string")
}
// Parse the string using go-humanize
bytes, err := humanize.ParseBytes(strVal)
if err != nil {
return fmt.Errorf("invalid size format: %w", err)
}
*s = Size(bytes)
return nil
}
// Int64 returns the size as int64
func (s Size) Int64() int64 {
return int64(s)
}
// String returns the size as a human-readable string
func (s Size) String() string {
return humanize.Bytes(uint64(s))
}