Refactor blob storage to use UUID primary keys and implement streaming chunking

- Changed blob table to use ID (UUID) as primary key instead of hash - Blob records are now created at packing start, enabling immediate chunk associations - Implemented streaming chunking to process large files without memory exhaustion - Fixed blob manifest generation to include all referenced blobs - Updated all foreign key references from blob_hash to blob_id - Added progress reporting and improved error handling - Enforced encryption requirement for all blob packing - Updated tests to use test encryption keys - Added Cyrillic transliteration to README
2025-07-22 07:43:39 +02:00
parent 26db096913
commit 86b533d6ee
49 changed files with 5709 additions and 324 deletions
--- a/config.example.yml
+++ b/config.example.yml
@@ -0,0 +1,144 @@
+# vaultik configuration file example
+# This file shows all available configuration options with their default values
+# Copy this file and uncomment/modify the values you need
+
+# Age recipient public key for encryption
+# This is REQUIRED - backups are encrypted to this public key
+# Generate with: age-keygen | grep "public key"
+age_recipient: age1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+
+# List of directories to backup
+# These paths will be scanned recursively for files to backup
+# Use absolute paths
+source_dirs:
+  - /
+  # - /home
+  # - /etc
+  # - /var
+
+# Patterns to exclude from backup
+# Uses glob patterns to match file paths
+# Paths are matched as absolute paths
+exclude:
+  # System directories that should not be backed up
+  - /proc
+  - /sys
+  - /dev
+  - /run
+  - /tmp
+  - /var/tmp
+  - /var/run
+  - /var/lock
+  - /var/cache
+  - /lost+found
+  - /media
+  - /mnt
+  # Swap files
+  - /swapfile
+  - /swap.img
+  - "*.swap"
+  - "*.swp"
+  # Log files (optional - you may want to keep some logs)
+  - "*.log"
+  - "*.log.*"
+  - /var/log
+  # Package manager caches
+  - /var/cache/apt
+  - /var/cache/yum
+  - /var/cache/dnf
+  - /var/cache/pacman
+  # User caches and temporary files
+  - "*/.cache"
+  - "*/.local/share/Trash"
+  - "*/Downloads"
+  - "*/.thumbnails"
+  # Development artifacts
+  - "**/node_modules"
+  - "**/.git/objects"
+  - "**/target"
+  - "**/build"
+  - "**/__pycache__"
+  - "**/*.pyc"
+  # Large files you might not want to backup
+  - "*.iso"
+  - "*.img"
+  - "*.vmdk"
+  - "*.vdi"
+  - "*.qcow2"
+
+# S3-compatible storage configuration
+s3:
+  # S3-compatible endpoint URL
+  # Examples: https://s3.amazonaws.com, https://storage.googleapis.com
+  endpoint: https://s3.example.com
+  
+  # Bucket name where backups will be stored
+  bucket: my-backup-bucket
+  
+  # Prefix (folder) within the bucket for this host's backups
+  # Useful for organizing backups from multiple hosts
+  # Default: empty (root of bucket)
+  #prefix: "hosts/myserver/"
+  
+  # S3 access credentials
+  access_key_id: your-access-key
+  secret_access_key: your-secret-key
+  
+  # S3 region
+  # Default: us-east-1
+  #region: us-east-1
+  
+  # Use SSL/TLS for S3 connections
+  # Default: true
+  #use_ssl: true
+  
+  # Part size for multipart uploads
+  # Minimum 5MB, affects memory usage during upload
+  # Supports: 5MB, 10M, 100MiB, etc.
+  # Default: 5MB
+  #part_size: 5MB
+
+# How often to run backups in daemon mode
+# Format: 1h, 30m, 24h, etc
+# Default: 1h
+#backup_interval: 1h
+
+# How often to do a full filesystem scan in daemon mode
+# Between full scans, inotify is used to detect changes
+# Default: 24h
+#full_scan_interval: 24h
+
+# Minimum time between backup runs in daemon mode
+# Prevents backups from running too frequently
+# Default: 15m
+#min_time_between_run: 15m
+
+# Path to local SQLite index database
+# This database tracks file state for incremental backups
+# Default: /var/lib/vaultik/index.sqlite
+#index_path: /var/lib/vaultik/index.sqlite
+
+# Prefix for index/metadata files in S3
+# Default: index/
+#index_prefix: index/
+
+# Average chunk size for content-defined chunking
+# Smaller chunks = better deduplication but more metadata
+# Supports: 10MB, 5M, 1GB, 500KB, 64MiB, etc.
+# Default: 10MB
+#chunk_size: 10MB
+
+# Maximum blob size
+# Multiple chunks are packed into blobs up to this size
+# Supports: 1GB, 10G, 500MB, 1GiB, etc.
+# Default: 10GB
+#blob_size_limit: 10GB
+
+# Compression level (1-19)
+# Higher = better compression but slower
+# Default: 3
+#compression_level: 3
+
+# Hostname to use in backup metadata
+# Default: system hostname
+#hostname: myserver