Refactor blob storage to use UUID primary keys and implement streaming chunking
- Changed blob table to use ID (UUID) as primary key instead of hash - Blob records are now created at packing start, enabling immediate chunk associations - Implemented streaming chunking to process large files without memory exhaustion - Fixed blob manifest generation to include all referenced blobs - Updated all foreign key references from blob_hash to blob_id - Added progress reporting and improved error handling - Enforced encryption requirement for all blob packing - Updated tests to use test encryption keys - Added Cyrillic transliteration to README
This commit is contained in:
@@ -17,17 +17,23 @@ func NewSnapshotRepository(db *DB) *SnapshotRepository {
|
||||
|
||||
func (r *SnapshotRepository) Create(ctx context.Context, tx *sql.Tx, snapshot *Snapshot) error {
|
||||
query := `
|
||||
INSERT INTO snapshots (id, hostname, vaultik_version, created_ts, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
INSERT INTO snapshots (id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`
|
||||
|
||||
var completedAt *int64
|
||||
if snapshot.CompletedAt != nil {
|
||||
ts := snapshot.CompletedAt.Unix()
|
||||
completedAt = &ts
|
||||
}
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.CreatedTS.Unix(),
|
||||
snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
|
||||
_, err = tx.ExecContext(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.StartedAt.Unix(),
|
||||
completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLock(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.CreatedTS.Unix(),
|
||||
snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
|
||||
_, err = r.db.ExecWithLock(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.StartedAt.Unix(),
|
||||
completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@@ -70,19 +76,21 @@ func (r *SnapshotRepository) UpdateCounts(ctx context.Context, tx *sql.Tx, snaps
|
||||
|
||||
func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*Snapshot, error) {
|
||||
query := `
|
||||
SELECT id, hostname, vaultik_version, created_ts, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
|
||||
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
|
||||
FROM snapshots
|
||||
WHERE id = ?
|
||||
`
|
||||
|
||||
var snapshot Snapshot
|
||||
var createdTSUnix int64
|
||||
var startedAtUnix int64
|
||||
var completedAtUnix *int64
|
||||
|
||||
err := r.db.conn.QueryRowContext(ctx, query, snapshotID).Scan(
|
||||
&snapshot.ID,
|
||||
&snapshot.Hostname,
|
||||
&snapshot.VaultikVersion,
|
||||
&createdTSUnix,
|
||||
&startedAtUnix,
|
||||
&completedAtUnix,
|
||||
&snapshot.FileCount,
|
||||
&snapshot.ChunkCount,
|
||||
&snapshot.BlobCount,
|
||||
@@ -98,16 +106,20 @@ func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*S
|
||||
return nil, fmt.Errorf("querying snapshot: %w", err)
|
||||
}
|
||||
|
||||
snapshot.CreatedTS = time.Unix(createdTSUnix, 0)
|
||||
snapshot.StartedAt = time.Unix(startedAtUnix, 0)
|
||||
if completedAtUnix != nil {
|
||||
t := time.Unix(*completedAtUnix, 0)
|
||||
snapshot.CompletedAt = &t
|
||||
}
|
||||
|
||||
return &snapshot, nil
|
||||
}
|
||||
|
||||
func (r *SnapshotRepository) ListRecent(ctx context.Context, limit int) ([]*Snapshot, error) {
|
||||
query := `
|
||||
SELECT id, hostname, vaultik_version, created_ts, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
|
||||
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
|
||||
FROM snapshots
|
||||
ORDER BY created_ts DESC
|
||||
ORDER BY started_at DESC
|
||||
LIMIT ?
|
||||
`
|
||||
|
||||
@@ -120,13 +132,15 @@ func (r *SnapshotRepository) ListRecent(ctx context.Context, limit int) ([]*Snap
|
||||
var snapshots []*Snapshot
|
||||
for rows.Next() {
|
||||
var snapshot Snapshot
|
||||
var createdTSUnix int64
|
||||
var startedAtUnix int64
|
||||
var completedAtUnix *int64
|
||||
|
||||
err := rows.Scan(
|
||||
&snapshot.ID,
|
||||
&snapshot.Hostname,
|
||||
&snapshot.VaultikVersion,
|
||||
&createdTSUnix,
|
||||
&startedAtUnix,
|
||||
&completedAtUnix,
|
||||
&snapshot.FileCount,
|
||||
&snapshot.ChunkCount,
|
||||
&snapshot.BlobCount,
|
||||
@@ -138,7 +152,154 @@ func (r *SnapshotRepository) ListRecent(ctx context.Context, limit int) ([]*Snap
|
||||
return nil, fmt.Errorf("scanning snapshot: %w", err)
|
||||
}
|
||||
|
||||
snapshot.CreatedTS = time.Unix(createdTSUnix, 0)
|
||||
snapshot.StartedAt = time.Unix(startedAtUnix, 0)
|
||||
if completedAtUnix != nil {
|
||||
t := time.Unix(*completedAtUnix, 0)
|
||||
snapshot.CompletedAt = &t
|
||||
}
|
||||
|
||||
snapshots = append(snapshots, &snapshot)
|
||||
}
|
||||
|
||||
return snapshots, rows.Err()
|
||||
}
|
||||
|
||||
// MarkComplete marks a snapshot as completed with the current timestamp
|
||||
func (r *SnapshotRepository) MarkComplete(ctx context.Context, tx *sql.Tx, snapshotID string) error {
|
||||
query := `
|
||||
UPDATE snapshots
|
||||
SET completed_at = ?
|
||||
WHERE id = ?
|
||||
`
|
||||
|
||||
completedAt := time.Now().Unix()
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, completedAt, snapshotID)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLock(ctx, query, completedAt, snapshotID)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("marking snapshot complete: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddFile adds a file to a snapshot
|
||||
func (r *SnapshotRepository) AddFile(ctx context.Context, tx *sql.Tx, snapshotID string, filePath string) error {
|
||||
query := `
|
||||
INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_path)
|
||||
VALUES (?, ?)
|
||||
`
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, snapshotID, filePath)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLock(ctx, query, snapshotID, filePath)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("adding file to snapshot: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddBlob adds a blob to a snapshot
|
||||
func (r *SnapshotRepository) AddBlob(ctx context.Context, tx *sql.Tx, snapshotID string, blobID string, blobHash string) error {
|
||||
query := `
|
||||
INSERT OR IGNORE INTO snapshot_blobs (snapshot_id, blob_id, blob_hash)
|
||||
VALUES (?, ?, ?)
|
||||
`
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, snapshotID, blobID, blobHash)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLock(ctx, query, snapshotID, blobID, blobHash)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("adding blob to snapshot: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetBlobHashes returns all blob hashes for a snapshot
|
||||
func (r *SnapshotRepository) GetBlobHashes(ctx context.Context, snapshotID string) ([]string, error) {
|
||||
query := `
|
||||
SELECT sb.blob_hash
|
||||
FROM snapshot_blobs sb
|
||||
WHERE sb.snapshot_id = ?
|
||||
ORDER BY sb.blob_hash
|
||||
`
|
||||
|
||||
rows, err := r.db.conn.QueryContext(ctx, query, snapshotID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("querying blob hashes: %w", err)
|
||||
}
|
||||
defer CloseRows(rows)
|
||||
|
||||
var blobs []string
|
||||
for rows.Next() {
|
||||
var blobHash string
|
||||
if err := rows.Scan(&blobHash); err != nil {
|
||||
return nil, fmt.Errorf("scanning blob hash: %w", err)
|
||||
}
|
||||
blobs = append(blobs, blobHash)
|
||||
}
|
||||
|
||||
return blobs, rows.Err()
|
||||
}
|
||||
|
||||
// GetIncompleteSnapshots returns all snapshots that haven't been completed
|
||||
func (r *SnapshotRepository) GetIncompleteSnapshots(ctx context.Context) ([]*Snapshot, error) {
|
||||
query := `
|
||||
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
|
||||
FROM snapshots
|
||||
WHERE completed_at IS NULL
|
||||
ORDER BY started_at DESC
|
||||
`
|
||||
|
||||
rows, err := r.db.conn.QueryContext(ctx, query)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("querying incomplete snapshots: %w", err)
|
||||
}
|
||||
defer CloseRows(rows)
|
||||
|
||||
var snapshots []*Snapshot
|
||||
for rows.Next() {
|
||||
var snapshot Snapshot
|
||||
var startedAtUnix int64
|
||||
var completedAtUnix *int64
|
||||
|
||||
err := rows.Scan(
|
||||
&snapshot.ID,
|
||||
&snapshot.Hostname,
|
||||
&snapshot.VaultikVersion,
|
||||
&startedAtUnix,
|
||||
&completedAtUnix,
|
||||
&snapshot.FileCount,
|
||||
&snapshot.ChunkCount,
|
||||
&snapshot.BlobCount,
|
||||
&snapshot.TotalSize,
|
||||
&snapshot.BlobSize,
|
||||
&snapshot.CompressionRatio,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning snapshot: %w", err)
|
||||
}
|
||||
|
||||
snapshot.StartedAt = time.Unix(startedAtUnix, 0)
|
||||
if completedAtUnix != nil {
|
||||
t := time.Unix(*completedAtUnix, 0)
|
||||
snapshot.CompletedAt = &t
|
||||
}
|
||||
|
||||
snapshots = append(snapshots, &snapshot)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user