Refactor blob storage to use UUID primary keys and implement streaming chunking
- Changed blob table to use ID (UUID) as primary key instead of hash - Blob records are now created at packing start, enabling immediate chunk associations - Implemented streaming chunking to process large files without memory exhaustion - Fixed blob manifest generation to include all referenced blobs - Updated all foreign key references from blob_hash to blob_id - Added progress reporting and improved error handling - Enforced encryption requirement for all blob packing - Updated tests to use test encryption keys - Added Cyrillic transliteration to README
This commit is contained in:
@@ -11,10 +11,10 @@ import (
|
||||
|
||||
// Config represents the application configuration
|
||||
type Config struct {
|
||||
AgeRecipient string `yaml:"age_recipient"`
|
||||
AgeRecipients []string `yaml:"age_recipients"`
|
||||
BackupInterval time.Duration `yaml:"backup_interval"`
|
||||
BlobSizeLimit int64 `yaml:"blob_size_limit"`
|
||||
ChunkSize int64 `yaml:"chunk_size"`
|
||||
BlobSizeLimit Size `yaml:"blob_size_limit"`
|
||||
ChunkSize Size `yaml:"chunk_size"`
|
||||
Exclude []string `yaml:"exclude"`
|
||||
FullScanInterval time.Duration `yaml:"full_scan_interval"`
|
||||
Hostname string `yaml:"hostname"`
|
||||
@@ -35,7 +35,7 @@ type S3Config struct {
|
||||
SecretAccessKey string `yaml:"secret_access_key"`
|
||||
Region string `yaml:"region"`
|
||||
UseSSL bool `yaml:"use_ssl"`
|
||||
PartSize int64 `yaml:"part_size"`
|
||||
PartSize Size `yaml:"part_size"`
|
||||
}
|
||||
|
||||
// ConfigPath wraps the config file path for fx injection
|
||||
@@ -64,8 +64,8 @@ func Load(path string) (*Config, error) {
|
||||
|
||||
cfg := &Config{
|
||||
// Set defaults
|
||||
BlobSizeLimit: 10 * 1024 * 1024 * 1024, // 10GB
|
||||
ChunkSize: 10 * 1024 * 1024, // 10MB
|
||||
BlobSizeLimit: Size(10 * 1024 * 1024 * 1024), // 10GB
|
||||
ChunkSize: Size(10 * 1024 * 1024), // 10MB
|
||||
BackupInterval: 1 * time.Hour,
|
||||
FullScanInterval: 24 * time.Hour,
|
||||
MinTimeBetweenRun: 15 * time.Minute,
|
||||
@@ -97,7 +97,7 @@ func Load(path string) (*Config, error) {
|
||||
cfg.S3.Region = "us-east-1"
|
||||
}
|
||||
if cfg.S3.PartSize == 0 {
|
||||
cfg.S3.PartSize = 5 * 1024 * 1024 // 5MB
|
||||
cfg.S3.PartSize = Size(5 * 1024 * 1024) // 5MB
|
||||
}
|
||||
|
||||
if err := cfg.Validate(); err != nil {
|
||||
@@ -109,8 +109,8 @@ func Load(path string) (*Config, error) {
|
||||
|
||||
// Validate checks if the configuration is valid
|
||||
func (c *Config) Validate() error {
|
||||
if c.AgeRecipient == "" {
|
||||
return fmt.Errorf("age_recipient is required")
|
||||
if len(c.AgeRecipients) == 0 {
|
||||
return fmt.Errorf("at least one age_recipient is required")
|
||||
}
|
||||
|
||||
if len(c.SourceDirs) == 0 {
|
||||
@@ -133,11 +133,11 @@ func (c *Config) Validate() error {
|
||||
return fmt.Errorf("s3.secret_access_key is required")
|
||||
}
|
||||
|
||||
if c.ChunkSize < 1024*1024 { // 1MB minimum
|
||||
if c.ChunkSize.Int64() < 1024*1024 { // 1MB minimum
|
||||
return fmt.Errorf("chunk_size must be at least 1MB")
|
||||
}
|
||||
|
||||
if c.BlobSizeLimit < c.ChunkSize {
|
||||
if c.BlobSizeLimit.Int64() < c.ChunkSize.Int64() {
|
||||
return fmt.Errorf("blob_size_limit must be at least chunk_size")
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,12 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
const (
|
||||
TEST_SNEAK_AGE_PUBLIC_KEY = "age1278m9q7dp3chsh2dcy82qk27v047zywyvtxwnj4cvt0z65jw6a7q5dqhfj"
|
||||
TEST_INTEGRATION_AGE_PUBLIC_KEY = "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
|
||||
TEST_INTEGRATION_AGE_PRIVATE_KEY = "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
// Set up test environment
|
||||
testConfigPath := filepath.Join("..", "..", "test", "config.yaml")
|
||||
@@ -32,8 +38,11 @@ func TestConfigLoad(t *testing.T) {
|
||||
}
|
||||
|
||||
// Basic validation
|
||||
if cfg.AgeRecipient != "age1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" {
|
||||
t.Errorf("Expected age recipient to be set, got '%s'", cfg.AgeRecipient)
|
||||
if len(cfg.AgeRecipients) != 2 {
|
||||
t.Errorf("Expected 2 age recipients, got %d", len(cfg.AgeRecipients))
|
||||
}
|
||||
if cfg.AgeRecipients[0] != TEST_SNEAK_AGE_PUBLIC_KEY {
|
||||
t.Errorf("Expected first age recipient to be %s, got '%s'", TEST_SNEAK_AGE_PUBLIC_KEY, cfg.AgeRecipients[0])
|
||||
}
|
||||
|
||||
if len(cfg.SourceDirs) != 2 {
|
||||
|
||||
45
internal/config/size.go
Normal file
45
internal/config/size.go
Normal file
@@ -0,0 +1,45 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/dustin/go-humanize"
|
||||
)
|
||||
|
||||
// Size is a custom type that can unmarshal from both int64 and string
|
||||
type Size int64
|
||||
|
||||
// UnmarshalYAML implements yaml.Unmarshaler for Size
|
||||
func (s *Size) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
// Try to unmarshal as int64 first
|
||||
var intVal int64
|
||||
if err := unmarshal(&intVal); err == nil {
|
||||
*s = Size(intVal)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Try to unmarshal as string
|
||||
var strVal string
|
||||
if err := unmarshal(&strVal); err != nil {
|
||||
return fmt.Errorf("size must be a number or string")
|
||||
}
|
||||
|
||||
// Parse the string using go-humanize
|
||||
bytes, err := humanize.ParseBytes(strVal)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid size format: %w", err)
|
||||
}
|
||||
|
||||
*s = Size(bytes)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Int64 returns the size as int64
|
||||
func (s Size) Int64() int64 {
|
||||
return int64(s)
|
||||
}
|
||||
|
||||
// String returns the size as a human-readable string
|
||||
func (s Size) String() string {
|
||||
return humanize.Bytes(uint64(s))
|
||||
}
|
||||
Reference in New Issue
Block a user