Refactor blob storage to use UUID primary keys and implement streaming chunking

- Changed blob table to use ID (UUID) as primary key instead of hash
- Blob records are now created at packing start, enabling immediate chunk associations
- Implemented streaming chunking to process large files without memory exhaustion
- Fixed blob manifest generation to include all referenced blobs
- Updated all foreign key references from blob_hash to blob_id
- Added progress reporting and improved error handling
- Enforced encryption requirement for all blob packing
- Updated tests to use test encryption keys
- Added Cyrillic transliteration to README
This commit is contained in:
2025-07-22 07:43:39 +02:00
parent 26db096913
commit 86b533d6ee
49 changed files with 5709 additions and 324 deletions

45
internal/config/size.go Normal file
View File

@@ -0,0 +1,45 @@
package config
import (
"fmt"
"github.com/dustin/go-humanize"
)
// Size is a custom type that can unmarshal from both int64 and string
type Size int64
// UnmarshalYAML implements yaml.Unmarshaler for Size
func (s *Size) UnmarshalYAML(unmarshal func(interface{}) error) error {
// Try to unmarshal as int64 first
var intVal int64
if err := unmarshal(&intVal); err == nil {
*s = Size(intVal)
return nil
}
// Try to unmarshal as string
var strVal string
if err := unmarshal(&strVal); err != nil {
return fmt.Errorf("size must be a number or string")
}
// Parse the string using go-humanize
bytes, err := humanize.ParseBytes(strVal)
if err != nil {
return fmt.Errorf("invalid size format: %w", err)
}
*s = Size(bytes)
return nil
}
// Int64 returns the size as int64
func (s Size) Int64() int64 {
return int64(s)
}
// String returns the size as a human-readable string
func (s Size) String() string {
return humanize.Bytes(uint64(s))
}