package database import ( "context" "database/sql" "fmt" "git.eeqj.de/sneak/vaultik/internal/types" ) type FileChunkRepository struct { db *DB } func NewFileChunkRepository(db *DB) *FileChunkRepository { return &FileChunkRepository{db: db} } func (r *FileChunkRepository) Create(ctx context.Context, tx *sql.Tx, fc *FileChunk) error { query := ` INSERT INTO file_chunks (file_id, idx, chunk_hash) VALUES (?, ?, ?) ON CONFLICT(file_id, idx) DO NOTHING ` var err error if tx != nil { _, err = tx.ExecContext(ctx, query, fc.FileID.String(), fc.Idx, fc.ChunkHash.String()) } else { _, err = r.db.ExecWithLog(ctx, query, fc.FileID.String(), fc.Idx, fc.ChunkHash.String()) } if err != nil { return fmt.Errorf("inserting file_chunk: %w", err) } return nil } func (r *FileChunkRepository) GetByPath(ctx context.Context, path string) ([]*FileChunk, error) { query := ` SELECT fc.file_id, fc.idx, fc.chunk_hash FROM file_chunks fc JOIN files f ON fc.file_id = f.id WHERE f.path = ? ORDER BY fc.idx ` rows, err := r.db.conn.QueryContext(ctx, query, path) if err != nil { return nil, fmt.Errorf("querying file chunks: %w", err) } defer CloseRows(rows) return r.scanFileChunks(rows) } // GetByFileID retrieves file chunks by file ID func (r *FileChunkRepository) GetByFileID(ctx context.Context, fileID types.FileID) ([]*FileChunk, error) { query := ` SELECT file_id, idx, chunk_hash FROM file_chunks WHERE file_id = ? ORDER BY idx ` rows, err := r.db.conn.QueryContext(ctx, query, fileID.String()) if err != nil { return nil, fmt.Errorf("querying file chunks: %w", err) } defer CloseRows(rows) return r.scanFileChunks(rows) } // GetByPathTx retrieves file chunks within a transaction func (r *FileChunkRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path string) ([]*FileChunk, error) { query := ` SELECT fc.file_id, fc.idx, fc.chunk_hash FROM file_chunks fc JOIN files f ON fc.file_id = f.id WHERE f.path = ? ORDER BY fc.idx ` LogSQL("GetByPathTx", query, path) rows, err := tx.QueryContext(ctx, query, path) if err != nil { return nil, fmt.Errorf("querying file chunks: %w", err) } defer CloseRows(rows) fileChunks, err := r.scanFileChunks(rows) LogSQL("GetByPathTx", "Complete", path, "count", len(fileChunks)) return fileChunks, err } // scanFileChunks is a helper that scans file chunk rows func (r *FileChunkRepository) scanFileChunks(rows *sql.Rows) ([]*FileChunk, error) { var fileChunks []*FileChunk for rows.Next() { var fc FileChunk var fileIDStr, chunkHashStr string err := rows.Scan(&fileIDStr, &fc.Idx, &chunkHashStr) if err != nil { return nil, fmt.Errorf("scanning file chunk: %w", err) } fc.FileID, err = types.ParseFileID(fileIDStr) if err != nil { return nil, fmt.Errorf("parsing file ID: %w", err) } fc.ChunkHash = types.ChunkHash(chunkHashStr) fileChunks = append(fileChunks, &fc) } return fileChunks, rows.Err() } func (r *FileChunkRepository) DeleteByPath(ctx context.Context, tx *sql.Tx, path string) error { query := `DELETE FROM file_chunks WHERE file_id = (SELECT id FROM files WHERE path = ?)` var err error if tx != nil { _, err = tx.ExecContext(ctx, query, path) } else { _, err = r.db.ExecWithLog(ctx, query, path) } if err != nil { return fmt.Errorf("deleting file chunks: %w", err) } return nil } // DeleteByFileID deletes all chunks for a file by its UUID func (r *FileChunkRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fileID types.FileID) error { query := `DELETE FROM file_chunks WHERE file_id = ?` var err error if tx != nil { _, err = tx.ExecContext(ctx, query, fileID.String()) } else { _, err = r.db.ExecWithLog(ctx, query, fileID.String()) } if err != nil { return fmt.Errorf("deleting file chunks: %w", err) } return nil } // DeleteByFileIDs deletes all chunks for multiple files in a single statement. func (r *FileChunkRepository) DeleteByFileIDs(ctx context.Context, tx *sql.Tx, fileIDs []types.FileID) error { if len(fileIDs) == 0 { return nil } // Batch at 500 to stay within SQLite's variable limit const batchSize = 500 for i := 0; i < len(fileIDs); i += batchSize { end := i + batchSize if end > len(fileIDs) { end = len(fileIDs) } batch := fileIDs[i:end] query := "DELETE FROM file_chunks WHERE file_id IN (?" + repeatPlaceholder(len(batch)-1) + ")" args := make([]interface{}, len(batch)) for j, id := range batch { args[j] = id.String() } var err error if tx != nil { _, err = tx.ExecContext(ctx, query, args...) } else { _, err = r.db.ExecWithLog(ctx, query, args...) } if err != nil { return fmt.Errorf("batch deleting file_chunks: %w", err) } } return nil } // CreateBatch inserts multiple file_chunks in a single statement for efficiency. // Batches are automatically split to stay within SQLite's variable limit. func (r *FileChunkRepository) CreateBatch(ctx context.Context, tx *sql.Tx, fcs []FileChunk) error { if len(fcs) == 0 { return nil } // SQLite has a limit on variables (typically 999 or 32766). // Each FileChunk has 3 values, so batch at 300 to be safe. const batchSize = 300 for i := 0; i < len(fcs); i += batchSize { end := i + batchSize if end > len(fcs) { end = len(fcs) } batch := fcs[i:end] // Build the query with multiple value sets query := "INSERT INTO file_chunks (file_id, idx, chunk_hash) VALUES " args := make([]interface{}, 0, len(batch)*3) for j, fc := range batch { if j > 0 { query += ", " } query += "(?, ?, ?)" args = append(args, fc.FileID.String(), fc.Idx, fc.ChunkHash.String()) } query += " ON CONFLICT(file_id, idx) DO NOTHING" var err error if tx != nil { _, err = tx.ExecContext(ctx, query, args...) } else { _, err = r.db.ExecWithLog(ctx, query, args...) } if err != nil { return fmt.Errorf("batch inserting file_chunks: %w", err) } } return nil } // GetByFile is an alias for GetByPath for compatibility func (r *FileChunkRepository) GetByFile(ctx context.Context, path string) ([]*FileChunk, error) { LogSQL("GetByFile", "Starting", path) result, err := r.GetByPath(ctx, path) LogSQL("GetByFile", "Complete", path, "count", len(result)) return result, err } // GetByFileTx retrieves file chunks within a transaction func (r *FileChunkRepository) GetByFileTx(ctx context.Context, tx *sql.Tx, path string) ([]*FileChunk, error) { LogSQL("GetByFileTx", "Starting", path) result, err := r.GetByPathTx(ctx, tx, path) LogSQL("GetByFileTx", "Complete", path, "count", len(result)) return result, err }