package backup import ( "context" "crypto/sha256" "database/sql" "encoding/hex" "fmt" "io" "os" "time" "git.eeqj.de/sneak/vaultik/internal/database" "github.com/spf13/afero" ) // Scanner scans directories and populates the database with file and chunk information type Scanner struct { fs afero.Fs chunkSize int repos *database.Repositories } // ScannerConfig contains configuration for the scanner type ScannerConfig struct { FS afero.Fs ChunkSize int Repositories *database.Repositories } // ScanResult contains the results of a scan operation type ScanResult struct { FilesScanned int BytesScanned int64 StartTime time.Time EndTime time.Time } // NewScanner creates a new scanner instance func NewScanner(cfg ScannerConfig) *Scanner { return &Scanner{ fs: cfg.FS, chunkSize: cfg.ChunkSize, repos: cfg.Repositories, } } // Scan scans a directory and populates the database func (s *Scanner) Scan(ctx context.Context, path string) (*ScanResult, error) { result := &ScanResult{ StartTime: time.Now(), } // Start a transaction err := s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error { return s.scanDirectory(ctx, tx, path, result) }) if err != nil { return nil, fmt.Errorf("scan failed: %w", err) } result.EndTime = time.Now() return result, nil } func (s *Scanner) scanDirectory(ctx context.Context, tx *sql.Tx, path string, result *ScanResult) error { return afero.Walk(s.fs, path, func(path string, info os.FileInfo, err error) error { if err != nil { return err } // Check context cancellation select { case <-ctx.Done(): return ctx.Err() default: } // Skip directories if info.IsDir() { return nil } // Process the file if err := s.processFile(ctx, tx, path, info, result); err != nil { return fmt.Errorf("failed to process %s: %w", path, err) } return nil }) } func (s *Scanner) processFile(ctx context.Context, tx *sql.Tx, path string, info os.FileInfo, result *ScanResult) error { // Get file stats stat, ok := info.Sys().(interface { Uid() uint32 Gid() uint32 }) var uid, gid uint32 if ok { uid = stat.Uid() gid = stat.Gid() } // Check if it's a symlink var linkTarget string if info.Mode()&os.ModeSymlink != 0 { // Read the symlink target if linker, ok := s.fs.(afero.LinkReader); ok { linkTarget, _ = linker.ReadlinkIfPossible(path) } } // Create file record file := &database.File{ Path: path, MTime: info.ModTime(), CTime: info.ModTime(), // afero doesn't provide ctime Size: info.Size(), Mode: uint32(info.Mode()), UID: uid, GID: gid, LinkTarget: linkTarget, } // Insert file if err := s.repos.Files.Create(ctx, tx, file); err != nil { return err } result.FilesScanned++ result.BytesScanned += info.Size() // Process chunks only for regular files if info.Mode().IsRegular() && info.Size() > 0 { if err := s.processFileChunks(ctx, tx, path, result); err != nil { return err } } return nil } func (s *Scanner) processFileChunks(ctx context.Context, tx *sql.Tx, path string, result *ScanResult) error { file, err := s.fs.Open(path) if err != nil { return err } defer func() { if err := file.Close(); err != nil { database.Fatal("failed to close file %s: %v", path, err) } }() sequence := 0 buffer := make([]byte, s.chunkSize) for { n, err := io.ReadFull(file, buffer) if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF { return err } if n == 0 { break } // Calculate chunk hash h := sha256.New() h.Write(buffer[:n]) hash := hex.EncodeToString(h.Sum(nil)) // Create chunk if it doesn't exist chunk := &database.Chunk{ ChunkHash: hash, SHA256: hash, // Using same hash for now Size: int64(n), } // Try to insert chunk (ignore duplicate errors) _ = s.repos.Chunks.Create(ctx, tx, chunk) // Create file-chunk mapping fileChunk := &database.FileChunk{ Path: path, ChunkHash: hash, Idx: sequence, } if err := s.repos.FileChunks.Create(ctx, tx, fileChunk); err != nil { return err } // Create chunk-file mapping chunkFile := &database.ChunkFile{ ChunkHash: hash, FilePath: path, FileOffset: int64(sequence * s.chunkSize), Length: int64(n), } if err := s.repos.ChunkFiles.Create(ctx, tx, chunkFile); err != nil { return err } sequence++ if err == io.EOF || err == io.ErrUnexpectedEOF { break } } return nil }