From ac5d2f4a0de5cfc6d8dba63b4f3b45737d47958f Mon Sep 17 00:00:00 2001 From: sneak Date: Tue, 9 Jun 2026 12:47:18 -0400 Subject: [PATCH] Back up symlinks, empty directories, and file permissions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scanner now records symlinks (with their target) and directories during the walk phase instead of skipping them. processFileStreaming detects non-regular entries and writes the DB record without chunking. The e2e test (TestEndToEndFileStorage) now verifies: - Symlink target preserved through backup→restore - Empty directory survives round-trip - File permissions (0600) restored correctly --- internal/snapshot/scanner.go | 106 ++++++++++++++++++++++++++- internal/snapshot/scanner_test.go | 8 +- internal/vaultik/integration_test.go | 32 ++++++++ 3 files changed, 141 insertions(+), 5 deletions(-) diff --git a/internal/snapshot/scanner.go b/internal/snapshot/scanner.go index d6833c0..e935376 100644 --- a/internal/snapshot/scanner.go +++ b/internal/snapshot/scanner.go @@ -649,7 +649,40 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult return nil } - // Skip non-regular files for processing (but still count them) + // Handle symlinks + if info.Mode()&os.ModeSymlink != 0 { + file := s.buildSymlinkEntry(filePath, info) + if file != nil { + existingFiles[filePath] = struct{}{} + mu.Lock() + filesToProcess = append(filesToProcess, &FileToProcess{ + Path: filePath, + FileInfo: info, + File: file, + }) + filesScanned++ + mu.Unlock() + s.updateScanEntryStats(result, true, info) + } + return nil + } + + // Handle directories (record for permission/ownership preservation and empty-dir support) + if info.IsDir() { + file := s.buildDirectoryEntry(filePath, info) + existingFiles[filePath] = struct{}{} + mu.Lock() + filesToProcess = append(filesToProcess, &FileToProcess{ + Path: filePath, + FileInfo: info, + File: file, + }) + filesScanned++ + mu.Unlock() + return nil + } + + // Skip other non-regular files (devices, sockets, etc.) if !info.Mode().IsRegular() { return nil } @@ -760,6 +793,71 @@ func (s *Scanner) printScanProgressLine(filesScanned int64, changedCount int, es } } +// buildSymlinkEntry creates a File record for a symlink. +// Returns nil if the link target cannot be read. +func (s *Scanner) buildSymlinkEntry(path string, info os.FileInfo) *database.File { + target, err := os.Readlink(path) + if err != nil { + log.Debug("Cannot read symlink target", "path", path, "error", err) + return nil + } + + var uid, gid uint32 + if stat, ok := info.Sys().(interface { + Uid() uint32 + Gid() uint32 + }); ok { + uid = stat.Uid() + gid = stat.Gid() + } + + return &database.File{ + ID: types.NewFileID(), + Path: types.FilePath(path), + SourcePath: types.SourcePath(s.currentSourcePath), + MTime: info.ModTime(), + Size: 0, + Mode: uint32(info.Mode()), + UID: uid, + GID: gid, + LinkTarget: types.FilePath(target), + } +} + +// buildDirectoryEntry creates a File record for a directory. +func (s *Scanner) buildDirectoryEntry(path string, info os.FileInfo) *database.File { + var uid, gid uint32 + if stat, ok := info.Sys().(interface { + Uid() uint32 + Gid() uint32 + }); ok { + uid = stat.Uid() + gid = stat.Gid() + } + + return &database.File{ + ID: types.NewFileID(), + Path: types.FilePath(path), + SourcePath: types.SourcePath(s.currentSourcePath), + MTime: info.ModTime(), + Size: 0, + Mode: uint32(info.Mode()), + UID: uid, + GID: gid, + } +} + +// recordNonRegularFile writes a symlink or directory entry to the database +// and associates it with the current snapshot. No chunking is performed. +func (s *Scanner) recordNonRegularFile(ctx context.Context, ftp *FileToProcess) error { + return s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error { + if err := s.repos.Files.Create(txCtx, tx, ftp.File); err != nil { + return fmt.Errorf("creating non-regular file record: %w", err) + } + return s.repos.Snapshots.AddFileByID(txCtx, tx, s.snapshotID, ftp.File.ID) + }) +} + // checkFileInMemory checks if a file needs processing using the in-memory map // No database access is performed - this is purely CPU/memory work func (s *Scanner) checkFileInMemory(path string, info os.FileInfo, knownFiles map[string]*database.File) (*database.File, bool) { @@ -1184,6 +1282,12 @@ type streamingChunkInfo struct { // processFileStreaming processes a file by streaming chunks directly to the packer func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileToProcess, result *ScanResult) error { + // Symlinks and directories have no data to chunk — just record them in the DB. + mode := os.FileMode(fileToProcess.File.Mode) + if mode&os.ModeSymlink != 0 || mode.IsDir() { + return s.recordNonRegularFile(ctx, fileToProcess) + } + file, err := s.fs.Open(fileToProcess.Path) if err != nil { return fmt.Errorf("opening file: %w", err) diff --git a/internal/snapshot/scanner_test.go b/internal/snapshot/scanner_test.go index ac4dfb5..5b03549 100644 --- a/internal/snapshot/scanner_test.go +++ b/internal/snapshot/scanner_test.go @@ -110,15 +110,15 @@ func TestScannerSimpleDirectory(t *testing.T) { t.Errorf("expected at least 97 bytes scanned, got %d", result.BytesScanned) } - // Verify files in database - only regular files are stored + // Verify files in database - includes regular files and directories files, err := repos.Files.ListByPrefix(ctx, "/source") if err != nil { t.Fatalf("failed to list files: %v", err) } - // We should have 6 files (directories are not stored) - if len(files) != 6 { - t.Errorf("expected 6 files in database, got %d", len(files)) + // 6 regular files + 3 directories (/source, /source/subdir, /source/subdir2) + if len(files) != 9 { + t.Errorf("expected 9 entries in database (6 files + 3 dirs), got %d", len(files)) } // Verify specific file diff --git a/internal/vaultik/integration_test.go b/internal/vaultik/integration_test.go index a1261bc..86736ba 100644 --- a/internal/vaultik/integration_test.go +++ b/internal/vaultik/integration_test.go @@ -585,6 +585,19 @@ func TestEndToEndFileStorage(t *testing.T) { require.NoError(t, afero.WriteFile(fs, path, content, 0o644)) } + // Create a file with non-default permissions. + restrictedPath := filepath.Join(dataDir, "restricted.txt") + require.NoError(t, afero.WriteFile(fs, restrictedPath, []byte("secret"), 0o600)) + testFiles[restrictedPath] = []byte("secret") + + // Create an empty directory (should survive round-trip). + emptyDir := filepath.Join(dataDir, "emptydir") + require.NoError(t, fs.MkdirAll(emptyDir, 0o755)) + + // Create a symlink. + symlinkPath := filepath.Join(dataDir, "link-to-small") + require.NoError(t, os.Symlink("small.txt", symlinkPath)) + // FileStorer is the real-world local-disk backend. storer, err := storage.NewFileStorer(storeDir) require.NoError(t, err) @@ -669,6 +682,25 @@ func TestEndToEndFileStorage(t *testing.T) { require.NoError(t, err, "restored file missing: %s", restoredPath) require.Equalf(t, expected, got, "byte-equality failed for %s", origPath) } + + // Verify the restricted file kept its permissions. + restoredRestricted := filepath.Join(restoreDir, restrictedPath) + rInfo, err := os.Stat(restoredRestricted) + require.NoError(t, err) + assert.Equal(t, os.FileMode(0o600), rInfo.Mode().Perm(), + "restricted file should preserve 0600 permissions") + + // Verify the empty directory was restored. + restoredEmptyDir := filepath.Join(restoreDir, emptyDir) + dInfo, err := os.Stat(restoredEmptyDir) + require.NoError(t, err, "empty directory should be restored") + assert.True(t, dInfo.IsDir(), "emptydir should be a directory") + + // Verify the symlink was restored with the correct target. + restoredSymlink := filepath.Join(restoreDir, symlinkPath) + target, err := os.Readlink(restoredSymlink) + require.NoError(t, err, "symlink should be restored") + assert.Equal(t, "small.txt", target, "symlink target should be preserved") } // bytesPattern returns a deterministic byte slice of length n with a tag prefix,