Back up symlinks, empty directories, and file permissions

Scanner now records symlinks (with their target) and directories
during the walk phase instead of skipping them. processFileStreaming
detects non-regular entries and writes the DB record without chunking.

The e2e test (TestEndToEndFileStorage) now verifies:
- Symlink target preserved through backup→restore
- Empty directory survives round-trip
- File permissions (0600) restored correctly
This commit is contained in:
2026-06-09 12:47:18 -04:00
parent b250ddfa94
commit ac5d2f4a0d
3 changed files with 141 additions and 5 deletions

View File

@@ -649,7 +649,40 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult
return nil
}
// Skip non-regular files for processing (but still count them)
// Handle symlinks
if info.Mode()&os.ModeSymlink != 0 {
file := s.buildSymlinkEntry(filePath, info)
if file != nil {
existingFiles[filePath] = struct{}{}
mu.Lock()
filesToProcess = append(filesToProcess, &FileToProcess{
Path: filePath,
FileInfo: info,
File: file,
})
filesScanned++
mu.Unlock()
s.updateScanEntryStats(result, true, info)
}
return nil
}
// Handle directories (record for permission/ownership preservation and empty-dir support)
if info.IsDir() {
file := s.buildDirectoryEntry(filePath, info)
existingFiles[filePath] = struct{}{}
mu.Lock()
filesToProcess = append(filesToProcess, &FileToProcess{
Path: filePath,
FileInfo: info,
File: file,
})
filesScanned++
mu.Unlock()
return nil
}
// Skip other non-regular files (devices, sockets, etc.)
if !info.Mode().IsRegular() {
return nil
}
@@ -760,6 +793,71 @@ func (s *Scanner) printScanProgressLine(filesScanned int64, changedCount int, es
}
}
// buildSymlinkEntry creates a File record for a symlink.
// Returns nil if the link target cannot be read.
func (s *Scanner) buildSymlinkEntry(path string, info os.FileInfo) *database.File {
target, err := os.Readlink(path)
if err != nil {
log.Debug("Cannot read symlink target", "path", path, "error", err)
return nil
}
var uid, gid uint32
if stat, ok := info.Sys().(interface {
Uid() uint32
Gid() uint32
}); ok {
uid = stat.Uid()
gid = stat.Gid()
}
return &database.File{
ID: types.NewFileID(),
Path: types.FilePath(path),
SourcePath: types.SourcePath(s.currentSourcePath),
MTime: info.ModTime(),
Size: 0,
Mode: uint32(info.Mode()),
UID: uid,
GID: gid,
LinkTarget: types.FilePath(target),
}
}
// buildDirectoryEntry creates a File record for a directory.
func (s *Scanner) buildDirectoryEntry(path string, info os.FileInfo) *database.File {
var uid, gid uint32
if stat, ok := info.Sys().(interface {
Uid() uint32
Gid() uint32
}); ok {
uid = stat.Uid()
gid = stat.Gid()
}
return &database.File{
ID: types.NewFileID(),
Path: types.FilePath(path),
SourcePath: types.SourcePath(s.currentSourcePath),
MTime: info.ModTime(),
Size: 0,
Mode: uint32(info.Mode()),
UID: uid,
GID: gid,
}
}
// recordNonRegularFile writes a symlink or directory entry to the database
// and associates it with the current snapshot. No chunking is performed.
func (s *Scanner) recordNonRegularFile(ctx context.Context, ftp *FileToProcess) error {
return s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
if err := s.repos.Files.Create(txCtx, tx, ftp.File); err != nil {
return fmt.Errorf("creating non-regular file record: %w", err)
}
return s.repos.Snapshots.AddFileByID(txCtx, tx, s.snapshotID, ftp.File.ID)
})
}
// checkFileInMemory checks if a file needs processing using the in-memory map
// No database access is performed - this is purely CPU/memory work
func (s *Scanner) checkFileInMemory(path string, info os.FileInfo, knownFiles map[string]*database.File) (*database.File, bool) {
@@ -1184,6 +1282,12 @@ type streamingChunkInfo struct {
// processFileStreaming processes a file by streaming chunks directly to the packer
func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileToProcess, result *ScanResult) error {
// Symlinks and directories have no data to chunk — just record them in the DB.
mode := os.FileMode(fileToProcess.File.Mode)
if mode&os.ModeSymlink != 0 || mode.IsDir() {
return s.recordNonRegularFile(ctx, fileToProcess)
}
file, err := s.fs.Open(fileToProcess.Path)
if err != nil {
return fmt.Errorf("opening file: %w", err)

View File

@@ -110,15 +110,15 @@ func TestScannerSimpleDirectory(t *testing.T) {
t.Errorf("expected at least 97 bytes scanned, got %d", result.BytesScanned)
}
// Verify files in database - only regular files are stored
// Verify files in database - includes regular files and directories
files, err := repos.Files.ListByPrefix(ctx, "/source")
if err != nil {
t.Fatalf("failed to list files: %v", err)
}
// We should have 6 files (directories are not stored)
if len(files) != 6 {
t.Errorf("expected 6 files in database, got %d", len(files))
// 6 regular files + 3 directories (/source, /source/subdir, /source/subdir2)
if len(files) != 9 {
t.Errorf("expected 9 entries in database (6 files + 3 dirs), got %d", len(files))
}
// Verify specific file

View File

@@ -585,6 +585,19 @@ func TestEndToEndFileStorage(t *testing.T) {
require.NoError(t, afero.WriteFile(fs, path, content, 0o644))
}
// Create a file with non-default permissions.
restrictedPath := filepath.Join(dataDir, "restricted.txt")
require.NoError(t, afero.WriteFile(fs, restrictedPath, []byte("secret"), 0o600))
testFiles[restrictedPath] = []byte("secret")
// Create an empty directory (should survive round-trip).
emptyDir := filepath.Join(dataDir, "emptydir")
require.NoError(t, fs.MkdirAll(emptyDir, 0o755))
// Create a symlink.
symlinkPath := filepath.Join(dataDir, "link-to-small")
require.NoError(t, os.Symlink("small.txt", symlinkPath))
// FileStorer is the real-world local-disk backend.
storer, err := storage.NewFileStorer(storeDir)
require.NoError(t, err)
@@ -669,6 +682,25 @@ func TestEndToEndFileStorage(t *testing.T) {
require.NoError(t, err, "restored file missing: %s", restoredPath)
require.Equalf(t, expected, got, "byte-equality failed for %s", origPath)
}
// Verify the restricted file kept its permissions.
restoredRestricted := filepath.Join(restoreDir, restrictedPath)
rInfo, err := os.Stat(restoredRestricted)
require.NoError(t, err)
assert.Equal(t, os.FileMode(0o600), rInfo.Mode().Perm(),
"restricted file should preserve 0600 permissions")
// Verify the empty directory was restored.
restoredEmptyDir := filepath.Join(restoreDir, emptyDir)
dInfo, err := os.Stat(restoredEmptyDir)
require.NoError(t, err, "empty directory should be restored")
assert.True(t, dInfo.IsDir(), "emptydir should be a directory")
// Verify the symlink was restored with the correct target.
restoredSymlink := filepath.Join(restoreDir, symlinkPath)
target, err := os.Readlink(restoredSymlink)
require.NoError(t, err, "symlink should be restored")
assert.Equal(t, "small.txt", target, "symlink target should be preserved")
}
// bytesPattern returns a deterministic byte slice of length n with a tag prefix,