Refactor blob storage to use UUID primary keys and implement streaming chunking

- Changed blob table to use ID (UUID) as primary key instead of hash
- Blob records are now created at packing start, enabling immediate chunk associations
- Implemented streaming chunking to process large files without memory exhaustion
- Fixed blob manifest generation to include all referenced blobs
- Updated all foreign key references from blob_hash to blob_id
- Added progress reporting and improved error handling
- Enforced encryption requirement for all blob packing
- Updated tests to use test encryption keys
- Added Cyrillic transliteration to README
This commit is contained in:
2025-07-22 07:43:39 +02:00
parent 26db096913
commit 86b533d6ee
49 changed files with 5709 additions and 324 deletions

View File

@@ -11,10 +11,10 @@ import (
// Config represents the application configuration
type Config struct {
AgeRecipient string `yaml:"age_recipient"`
AgeRecipients []string `yaml:"age_recipients"`
BackupInterval time.Duration `yaml:"backup_interval"`
BlobSizeLimit int64 `yaml:"blob_size_limit"`
ChunkSize int64 `yaml:"chunk_size"`
BlobSizeLimit Size `yaml:"blob_size_limit"`
ChunkSize Size `yaml:"chunk_size"`
Exclude []string `yaml:"exclude"`
FullScanInterval time.Duration `yaml:"full_scan_interval"`
Hostname string `yaml:"hostname"`
@@ -35,7 +35,7 @@ type S3Config struct {
SecretAccessKey string `yaml:"secret_access_key"`
Region string `yaml:"region"`
UseSSL bool `yaml:"use_ssl"`
PartSize int64 `yaml:"part_size"`
PartSize Size `yaml:"part_size"`
}
// ConfigPath wraps the config file path for fx injection
@@ -64,8 +64,8 @@ func Load(path string) (*Config, error) {
cfg := &Config{
// Set defaults
BlobSizeLimit: 10 * 1024 * 1024 * 1024, // 10GB
ChunkSize: 10 * 1024 * 1024, // 10MB
BlobSizeLimit: Size(10 * 1024 * 1024 * 1024), // 10GB
ChunkSize: Size(10 * 1024 * 1024), // 10MB
BackupInterval: 1 * time.Hour,
FullScanInterval: 24 * time.Hour,
MinTimeBetweenRun: 15 * time.Minute,
@@ -97,7 +97,7 @@ func Load(path string) (*Config, error) {
cfg.S3.Region = "us-east-1"
}
if cfg.S3.PartSize == 0 {
cfg.S3.PartSize = 5 * 1024 * 1024 // 5MB
cfg.S3.PartSize = Size(5 * 1024 * 1024) // 5MB
}
if err := cfg.Validate(); err != nil {
@@ -109,8 +109,8 @@ func Load(path string) (*Config, error) {
// Validate checks if the configuration is valid
func (c *Config) Validate() error {
if c.AgeRecipient == "" {
return fmt.Errorf("age_recipient is required")
if len(c.AgeRecipients) == 0 {
return fmt.Errorf("at least one age_recipient is required")
}
if len(c.SourceDirs) == 0 {
@@ -133,11 +133,11 @@ func (c *Config) Validate() error {
return fmt.Errorf("s3.secret_access_key is required")
}
if c.ChunkSize < 1024*1024 { // 1MB minimum
if c.ChunkSize.Int64() < 1024*1024 { // 1MB minimum
return fmt.Errorf("chunk_size must be at least 1MB")
}
if c.BlobSizeLimit < c.ChunkSize {
if c.BlobSizeLimit.Int64() < c.ChunkSize.Int64() {
return fmt.Errorf("blob_size_limit must be at least chunk_size")
}