vaultik/internal/database/chunk_files.go
sneak 417b25a5f5 Add custom types, version command, and restore --verify flag
- Add internal/types package with type-safe wrappers for IDs, hashes,
  paths, and credentials (FileID, BlobID, ChunkHash, etc.)
- Implement driver.Valuer and sql.Scanner for UUID-based types
- Add `vaultik version` command showing version, commit, go version
- Add `--verify` flag to restore command that checksums all restored
  files against expected chunk hashes with progress bar
- Remove fetch.go (dead code, functionality in restore)
- Clean up TODO.md, remove completed items
- Update all database and snapshot code to use new custom types
2026-01-14 17:11:52 -08:00

205 lines
5.2 KiB
Go

package database
import (
"context"
"database/sql"
"fmt"
"git.eeqj.de/sneak/vaultik/internal/types"
)
type ChunkFileRepository struct {
db *DB
}
func NewChunkFileRepository(db *DB) *ChunkFileRepository {
return &ChunkFileRepository{db: db}
}
func (r *ChunkFileRepository) Create(ctx context.Context, tx *sql.Tx, cf *ChunkFile) error {
query := `
INSERT INTO chunk_files (chunk_hash, file_id, file_offset, length)
VALUES (?, ?, ?, ?)
ON CONFLICT(chunk_hash, file_id) DO NOTHING
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, cf.ChunkHash.String(), cf.FileID.String(), cf.FileOffset, cf.Length)
} else {
_, err = r.db.ExecWithLog(ctx, query, cf.ChunkHash.String(), cf.FileID.String(), cf.FileOffset, cf.Length)
}
if err != nil {
return fmt.Errorf("inserting chunk_file: %w", err)
}
return nil
}
func (r *ChunkFileRepository) GetByChunkHash(ctx context.Context, chunkHash types.ChunkHash) ([]*ChunkFile, error) {
query := `
SELECT chunk_hash, file_id, file_offset, length
FROM chunk_files
WHERE chunk_hash = ?
`
rows, err := r.db.conn.QueryContext(ctx, query, chunkHash.String())
if err != nil {
return nil, fmt.Errorf("querying chunk files: %w", err)
}
defer CloseRows(rows)
return r.scanChunkFiles(rows)
}
func (r *ChunkFileRepository) GetByFilePath(ctx context.Context, filePath string) ([]*ChunkFile, error) {
query := `
SELECT cf.chunk_hash, cf.file_id, cf.file_offset, cf.length
FROM chunk_files cf
JOIN files f ON cf.file_id = f.id
WHERE f.path = ?
`
rows, err := r.db.conn.QueryContext(ctx, query, filePath)
if err != nil {
return nil, fmt.Errorf("querying chunk files: %w", err)
}
defer CloseRows(rows)
return r.scanChunkFiles(rows)
}
// GetByFileID retrieves chunk files by file ID
func (r *ChunkFileRepository) GetByFileID(ctx context.Context, fileID types.FileID) ([]*ChunkFile, error) {
query := `
SELECT chunk_hash, file_id, file_offset, length
FROM chunk_files
WHERE file_id = ?
`
rows, err := r.db.conn.QueryContext(ctx, query, fileID.String())
if err != nil {
return nil, fmt.Errorf("querying chunk files: %w", err)
}
defer CloseRows(rows)
return r.scanChunkFiles(rows)
}
// scanChunkFiles is a helper that scans chunk file rows
func (r *ChunkFileRepository) scanChunkFiles(rows *sql.Rows) ([]*ChunkFile, error) {
var chunkFiles []*ChunkFile
for rows.Next() {
var cf ChunkFile
var chunkHashStr, fileIDStr string
err := rows.Scan(&chunkHashStr, &fileIDStr, &cf.FileOffset, &cf.Length)
if err != nil {
return nil, fmt.Errorf("scanning chunk file: %w", err)
}
cf.ChunkHash = types.ChunkHash(chunkHashStr)
cf.FileID, err = types.ParseFileID(fileIDStr)
if err != nil {
return nil, fmt.Errorf("parsing file ID: %w", err)
}
chunkFiles = append(chunkFiles, &cf)
}
return chunkFiles, rows.Err()
}
// DeleteByFileID deletes all chunk_files entries for a given file ID
func (r *ChunkFileRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fileID types.FileID) error {
query := `DELETE FROM chunk_files WHERE file_id = ?`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, fileID.String())
} else {
_, err = r.db.ExecWithLog(ctx, query, fileID.String())
}
if err != nil {
return fmt.Errorf("deleting chunk files: %w", err)
}
return nil
}
// DeleteByFileIDs deletes all chunk_files for multiple files in a single statement.
func (r *ChunkFileRepository) DeleteByFileIDs(ctx context.Context, tx *sql.Tx, fileIDs []types.FileID) error {
if len(fileIDs) == 0 {
return nil
}
// Batch at 500 to stay within SQLite's variable limit
const batchSize = 500
for i := 0; i < len(fileIDs); i += batchSize {
end := i + batchSize
if end > len(fileIDs) {
end = len(fileIDs)
}
batch := fileIDs[i:end]
query := "DELETE FROM chunk_files WHERE file_id IN (?" + repeatPlaceholder(len(batch)-1) + ")"
args := make([]interface{}, len(batch))
for j, id := range batch {
args[j] = id.String()
}
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, args...)
} else {
_, err = r.db.ExecWithLog(ctx, query, args...)
}
if err != nil {
return fmt.Errorf("batch deleting chunk_files: %w", err)
}
}
return nil
}
// CreateBatch inserts multiple chunk_files in a single statement for efficiency.
func (r *ChunkFileRepository) CreateBatch(ctx context.Context, tx *sql.Tx, cfs []ChunkFile) error {
if len(cfs) == 0 {
return nil
}
// Each ChunkFile has 4 values, so batch at 200 to be safe with SQLite's variable limit
const batchSize = 200
for i := 0; i < len(cfs); i += batchSize {
end := i + batchSize
if end > len(cfs) {
end = len(cfs)
}
batch := cfs[i:end]
query := "INSERT INTO chunk_files (chunk_hash, file_id, file_offset, length) VALUES "
args := make([]interface{}, 0, len(batch)*4)
for j, cf := range batch {
if j > 0 {
query += ", "
}
query += "(?, ?, ?, ?)"
args = append(args, cf.ChunkHash.String(), cf.FileID.String(), cf.FileOffset, cf.Length)
}
query += " ON CONFLICT(chunk_hash, file_id) DO NOTHING"
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, args...)
} else {
_, err = r.db.ExecWithLog(ctx, query, args...)
}
if err != nil {
return fmt.Errorf("batch inserting chunk_files: %w", err)
}
}
return nil
}