vaultik/internal/database/snapshots.go
sneak 86b533d6ee Refactor blob storage to use UUID primary keys and implement streaming chunking
- Changed blob table to use ID (UUID) as primary key instead of hash
- Blob records are now created at packing start, enabling immediate chunk associations
- Implemented streaming chunking to process large files without memory exhaustion
- Fixed blob manifest generation to include all referenced blobs
- Updated all foreign key references from blob_hash to blob_id
- Added progress reporting and improved error handling
- Enforced encryption requirement for all blob packing
- Updated tests to use test encryption keys
- Added Cyrillic transliteration to README
2025-07-22 07:43:39 +02:00

309 lines
7.7 KiB
Go

package database
import (
"context"
"database/sql"
"fmt"
"time"
)
type SnapshotRepository struct {
db *DB
}
func NewSnapshotRepository(db *DB) *SnapshotRepository {
return &SnapshotRepository{db: db}
}
func (r *SnapshotRepository) Create(ctx context.Context, tx *sql.Tx, snapshot *Snapshot) error {
query := `
INSERT INTO snapshots (id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`
var completedAt *int64
if snapshot.CompletedAt != nil {
ts := snapshot.CompletedAt.Unix()
completedAt = &ts
}
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.StartedAt.Unix(),
completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
} else {
_, err = r.db.ExecWithLock(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.StartedAt.Unix(),
completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
}
if err != nil {
return fmt.Errorf("inserting snapshot: %w", err)
}
return nil
}
func (r *SnapshotRepository) UpdateCounts(ctx context.Context, tx *sql.Tx, snapshotID string, fileCount, chunkCount, blobCount, totalSize, blobSize int64) error {
compressionRatio := 1.0
if totalSize > 0 {
compressionRatio = float64(blobSize) / float64(totalSize)
}
query := `
UPDATE snapshots
SET file_count = ?,
chunk_count = ?,
blob_count = ?,
total_size = ?,
blob_size = ?,
compression_ratio = ?
WHERE id = ?
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, fileCount, chunkCount, blobCount, totalSize, blobSize, compressionRatio, snapshotID)
} else {
_, err = r.db.ExecWithLock(ctx, query, fileCount, chunkCount, blobCount, totalSize, blobSize, compressionRatio, snapshotID)
}
if err != nil {
return fmt.Errorf("updating snapshot: %w", err)
}
return nil
}
func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*Snapshot, error) {
query := `
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
FROM snapshots
WHERE id = ?
`
var snapshot Snapshot
var startedAtUnix int64
var completedAtUnix *int64
err := r.db.conn.QueryRowContext(ctx, query, snapshotID).Scan(
&snapshot.ID,
&snapshot.Hostname,
&snapshot.VaultikVersion,
&startedAtUnix,
&completedAtUnix,
&snapshot.FileCount,
&snapshot.ChunkCount,
&snapshot.BlobCount,
&snapshot.TotalSize,
&snapshot.BlobSize,
&snapshot.CompressionRatio,
)
if err == sql.ErrNoRows {
return nil, nil
}
if err != nil {
return nil, fmt.Errorf("querying snapshot: %w", err)
}
snapshot.StartedAt = time.Unix(startedAtUnix, 0)
if completedAtUnix != nil {
t := time.Unix(*completedAtUnix, 0)
snapshot.CompletedAt = &t
}
return &snapshot, nil
}
func (r *SnapshotRepository) ListRecent(ctx context.Context, limit int) ([]*Snapshot, error) {
query := `
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
FROM snapshots
ORDER BY started_at DESC
LIMIT ?
`
rows, err := r.db.conn.QueryContext(ctx, query, limit)
if err != nil {
return nil, fmt.Errorf("querying snapshots: %w", err)
}
defer CloseRows(rows)
var snapshots []*Snapshot
for rows.Next() {
var snapshot Snapshot
var startedAtUnix int64
var completedAtUnix *int64
err := rows.Scan(
&snapshot.ID,
&snapshot.Hostname,
&snapshot.VaultikVersion,
&startedAtUnix,
&completedAtUnix,
&snapshot.FileCount,
&snapshot.ChunkCount,
&snapshot.BlobCount,
&snapshot.TotalSize,
&snapshot.BlobSize,
&snapshot.CompressionRatio,
)
if err != nil {
return nil, fmt.Errorf("scanning snapshot: %w", err)
}
snapshot.StartedAt = time.Unix(startedAtUnix, 0)
if completedAtUnix != nil {
t := time.Unix(*completedAtUnix, 0)
snapshot.CompletedAt = &t
}
snapshots = append(snapshots, &snapshot)
}
return snapshots, rows.Err()
}
// MarkComplete marks a snapshot as completed with the current timestamp
func (r *SnapshotRepository) MarkComplete(ctx context.Context, tx *sql.Tx, snapshotID string) error {
query := `
UPDATE snapshots
SET completed_at = ?
WHERE id = ?
`
completedAt := time.Now().Unix()
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, completedAt, snapshotID)
} else {
_, err = r.db.ExecWithLock(ctx, query, completedAt, snapshotID)
}
if err != nil {
return fmt.Errorf("marking snapshot complete: %w", err)
}
return nil
}
// AddFile adds a file to a snapshot
func (r *SnapshotRepository) AddFile(ctx context.Context, tx *sql.Tx, snapshotID string, filePath string) error {
query := `
INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_path)
VALUES (?, ?)
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, snapshotID, filePath)
} else {
_, err = r.db.ExecWithLock(ctx, query, snapshotID, filePath)
}
if err != nil {
return fmt.Errorf("adding file to snapshot: %w", err)
}
return nil
}
// AddBlob adds a blob to a snapshot
func (r *SnapshotRepository) AddBlob(ctx context.Context, tx *sql.Tx, snapshotID string, blobID string, blobHash string) error {
query := `
INSERT OR IGNORE INTO snapshot_blobs (snapshot_id, blob_id, blob_hash)
VALUES (?, ?, ?)
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, snapshotID, blobID, blobHash)
} else {
_, err = r.db.ExecWithLock(ctx, query, snapshotID, blobID, blobHash)
}
if err != nil {
return fmt.Errorf("adding blob to snapshot: %w", err)
}
return nil
}
// GetBlobHashes returns all blob hashes for a snapshot
func (r *SnapshotRepository) GetBlobHashes(ctx context.Context, snapshotID string) ([]string, error) {
query := `
SELECT sb.blob_hash
FROM snapshot_blobs sb
WHERE sb.snapshot_id = ?
ORDER BY sb.blob_hash
`
rows, err := r.db.conn.QueryContext(ctx, query, snapshotID)
if err != nil {
return nil, fmt.Errorf("querying blob hashes: %w", err)
}
defer CloseRows(rows)
var blobs []string
for rows.Next() {
var blobHash string
if err := rows.Scan(&blobHash); err != nil {
return nil, fmt.Errorf("scanning blob hash: %w", err)
}
blobs = append(blobs, blobHash)
}
return blobs, rows.Err()
}
// GetIncompleteSnapshots returns all snapshots that haven't been completed
func (r *SnapshotRepository) GetIncompleteSnapshots(ctx context.Context) ([]*Snapshot, error) {
query := `
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
FROM snapshots
WHERE completed_at IS NULL
ORDER BY started_at DESC
`
rows, err := r.db.conn.QueryContext(ctx, query)
if err != nil {
return nil, fmt.Errorf("querying incomplete snapshots: %w", err)
}
defer CloseRows(rows)
var snapshots []*Snapshot
for rows.Next() {
var snapshot Snapshot
var startedAtUnix int64
var completedAtUnix *int64
err := rows.Scan(
&snapshot.ID,
&snapshot.Hostname,
&snapshot.VaultikVersion,
&startedAtUnix,
&completedAtUnix,
&snapshot.FileCount,
&snapshot.ChunkCount,
&snapshot.BlobCount,
&snapshot.TotalSize,
&snapshot.BlobSize,
&snapshot.CompressionRatio,
)
if err != nil {
return nil, fmt.Errorf("scanning snapshot: %w", err)
}
snapshot.StartedAt = time.Unix(startedAtUnix, 0)
if completedAtUnix != nil {
t := time.Unix(*completedAtUnix, 0)
snapshot.CompletedAt = &t
}
snapshots = append(snapshots, &snapshot)
}
return snapshots, rows.Err()
}