vaultik/internal/backup/scanner.go
sneak 26db096913 Move StartTime initialization to application startup hook
- Remove StartTime initialization from globals.New()
- Add setupGlobals function in app.go to set StartTime during fx OnStart
- Simplify globals package to be just a key/value store
- Remove fx dependencies from globals test
2025-07-20 12:05:24 +02:00

217 lines
4.4 KiB
Go

package backup
import (
"context"
"crypto/sha256"
"database/sql"
"encoding/hex"
"fmt"
"io"
"os"
"time"
"git.eeqj.de/sneak/vaultik/internal/database"
"github.com/spf13/afero"
)
// Scanner scans directories and populates the database with file and chunk information
type Scanner struct {
fs afero.Fs
chunkSize int
repos *database.Repositories
}
// ScannerConfig contains configuration for the scanner
type ScannerConfig struct {
FS afero.Fs
ChunkSize int
Repositories *database.Repositories
}
// ScanResult contains the results of a scan operation
type ScanResult struct {
FilesScanned int
BytesScanned int64
StartTime time.Time
EndTime time.Time
}
// NewScanner creates a new scanner instance
func NewScanner(cfg ScannerConfig) *Scanner {
return &Scanner{
fs: cfg.FS,
chunkSize: cfg.ChunkSize,
repos: cfg.Repositories,
}
}
// Scan scans a directory and populates the database
func (s *Scanner) Scan(ctx context.Context, path string) (*ScanResult, error) {
result := &ScanResult{
StartTime: time.Now(),
}
// Start a transaction
err := s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
return s.scanDirectory(ctx, tx, path, result)
})
if err != nil {
return nil, fmt.Errorf("scan failed: %w", err)
}
result.EndTime = time.Now()
return result, nil
}
func (s *Scanner) scanDirectory(ctx context.Context, tx *sql.Tx, path string, result *ScanResult) error {
return afero.Walk(s.fs, path, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// Check context cancellation
select {
case <-ctx.Done():
return ctx.Err()
default:
}
// Skip directories
if info.IsDir() {
return nil
}
// Process the file
if err := s.processFile(ctx, tx, path, info, result); err != nil {
return fmt.Errorf("failed to process %s: %w", path, err)
}
return nil
})
}
func (s *Scanner) processFile(ctx context.Context, tx *sql.Tx, path string, info os.FileInfo, result *ScanResult) error {
// Get file stats
stat, ok := info.Sys().(interface {
Uid() uint32
Gid() uint32
})
var uid, gid uint32
if ok {
uid = stat.Uid()
gid = stat.Gid()
}
// Check if it's a symlink
var linkTarget string
if info.Mode()&os.ModeSymlink != 0 {
// Read the symlink target
if linker, ok := s.fs.(afero.LinkReader); ok {
linkTarget, _ = linker.ReadlinkIfPossible(path)
}
}
// Create file record
file := &database.File{
Path: path,
MTime: info.ModTime(),
CTime: info.ModTime(), // afero doesn't provide ctime
Size: info.Size(),
Mode: uint32(info.Mode()),
UID: uid,
GID: gid,
LinkTarget: linkTarget,
}
// Insert file
if err := s.repos.Files.Create(ctx, tx, file); err != nil {
return err
}
result.FilesScanned++
result.BytesScanned += info.Size()
// Process chunks only for regular files
if info.Mode().IsRegular() && info.Size() > 0 {
if err := s.processFileChunks(ctx, tx, path, result); err != nil {
return err
}
}
return nil
}
func (s *Scanner) processFileChunks(ctx context.Context, tx *sql.Tx, path string, result *ScanResult) error {
file, err := s.fs.Open(path)
if err != nil {
return err
}
defer func() {
if err := file.Close(); err != nil {
database.Fatal("failed to close file %s: %v", path, err)
}
}()
sequence := 0
buffer := make([]byte, s.chunkSize)
for {
n, err := io.ReadFull(file, buffer)
if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
return err
}
if n == 0 {
break
}
// Calculate chunk hash
h := sha256.New()
h.Write(buffer[:n])
hash := hex.EncodeToString(h.Sum(nil))
// Create chunk if it doesn't exist
chunk := &database.Chunk{
ChunkHash: hash,
SHA256: hash, // Using same hash for now
Size: int64(n),
}
// Try to insert chunk (ignore duplicate errors)
_ = s.repos.Chunks.Create(ctx, tx, chunk)
// Create file-chunk mapping
fileChunk := &database.FileChunk{
Path: path,
ChunkHash: hash,
Idx: sequence,
}
if err := s.repos.FileChunks.Create(ctx, tx, fileChunk); err != nil {
return err
}
// Create chunk-file mapping
chunkFile := &database.ChunkFile{
ChunkHash: hash,
FilePath: path,
FileOffset: int64(sequence * s.chunkSize),
Length: int64(n),
}
if err := s.repos.ChunkFiles.Create(ctx, tx, chunkFile); err != nil {
return err
}
sequence++
if err == io.EOF || err == io.ErrUnexpectedEOF {
break
}
}
return nil
}