From 802518b9171fb24908eebec149ae53bba9615794 Mon Sep 17 00:00:00 2001 From: clawbot Date: Thu, 19 Feb 2026 13:47:07 -0800 Subject: [PATCH 1/2] fix: clean up orphan resources on deploy cancellation (closes #89) --- internal/docker/client.go | 15 +++++ internal/service/deploy/deploy.go | 60 ++++++++++++++++-- .../service/deploy/deploy_cleanup_test.go | 63 +++++++++++++++++++ internal/service/deploy/export_test.go | 46 ++++++++++++++ 4 files changed, 180 insertions(+), 4 deletions(-) create mode 100644 internal/service/deploy/deploy_cleanup_test.go diff --git a/internal/docker/client.go b/internal/docker/client.go index e9ddfe4..10af151 100644 --- a/internal/docker/client.go +++ b/internal/docker/client.go @@ -17,6 +17,7 @@ import ( "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/filters" + "github.com/docker/docker/api/types/image" "github.com/docker/docker/api/types/mount" "github.com/docker/docker/api/types/network" "github.com/docker/docker/client" @@ -739,6 +740,20 @@ func (c *Client) connect(ctx context.Context) error { return nil } +// RemoveImage removes a Docker image by ID or tag. +// It returns nil if the image was successfully removed or does not exist. +func (c *Client) RemoveImage(ctx context.Context, imageID string) error { + _, err := c.docker.ImageRemove(ctx, imageID, image.RemoveOptions{ + Force: true, + PruneChildren: true, + }) + if err != nil && !client.IsErrNotFound(err) { + return fmt.Errorf("failed to remove image %s: %w", imageID, err) + } + + return nil +} + func (c *Client) close() error { if c.docker != nil { err := c.docker.Close() diff --git a/internal/service/deploy/deploy.go b/internal/service/deploy/deploy.go index a8d3751..81d79f6 100644 --- a/internal/service/deploy/deploy.go +++ b/internal/service/deploy/deploy.go @@ -472,7 +472,7 @@ func (svc *Service) runBuildAndDeploy( // Build phase with timeout imageID, err := svc.buildImageWithTimeout(deployCtx, app, deployment) if err != nil { - cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment) + cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment, "") if cancelErr != nil { return cancelErr } @@ -485,7 +485,7 @@ func (svc *Service) runBuildAndDeploy( // Deploy phase with timeout err = svc.deployContainerWithTimeout(deployCtx, app, deployment, imageID) if err != nil { - cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment) + cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment, imageID) if cancelErr != nil { return cancelErr } @@ -661,24 +661,76 @@ func (svc *Service) cancelActiveDeploy(appID string) { } // checkCancelled checks if the deploy context was cancelled (by a newer deploy) -// and if so, marks the deployment as cancelled. Returns ErrDeployCancelled or nil. +// and if so, marks the deployment as cancelled and cleans up orphan resources. +// Returns ErrDeployCancelled or nil. func (svc *Service) checkCancelled( deployCtx context.Context, bgCtx context.Context, app *models.App, deployment *models.Deployment, + imageID string, ) error { if !errors.Is(deployCtx.Err(), context.Canceled) { return nil } - svc.log.Info("deployment cancelled by newer deploy", "app", app.Name) + svc.log.Info("deployment cancelled", "app", app.Name) + + svc.cleanupCancelledDeploy(bgCtx, app, deployment, imageID) _ = deployment.MarkFinished(bgCtx, models.DeploymentStatusCancelled) return ErrDeployCancelled } +// cleanupCancelledDeploy removes orphan resources left by a cancelled deployment. +func (svc *Service) cleanupCancelledDeploy( + ctx context.Context, + app *models.App, + deployment *models.Deployment, + imageID string, +) { + // Clean up the intermediate Docker image if one was built + if imageID != "" { + removeErr := svc.docker.RemoveImage(ctx, imageID) + if removeErr != nil { + svc.log.Error("failed to remove image from cancelled deploy", + "error", removeErr, "app", app.Name, "image", imageID) + _ = deployment.AppendLog(ctx, "WARNING: failed to clean up image "+imageID+": "+removeErr.Error()) + } else { + svc.log.Info("cleaned up image from cancelled deploy", + "app", app.Name, "image", imageID) + _ = deployment.AppendLog(ctx, "Cleaned up intermediate image: "+imageID) + } + } + + // Clean up the build directory for this deployment + buildDir := svc.GetBuildDir(app.Name) + + entries, err := os.ReadDir(buildDir) + if err != nil { + return + } + + prefix := fmt.Sprintf("%d-", deployment.ID) + + for _, entry := range entries { + if entry.IsDir() && len(entry.Name()) > len(prefix) && entry.Name()[:len(prefix)] == prefix { + dirPath := filepath.Join(buildDir, entry.Name()) + + removeErr := os.RemoveAll(dirPath) + if removeErr != nil { + svc.log.Error("failed to remove build dir from cancelled deploy", + "error", removeErr, "path", dirPath) + } else { + svc.log.Info("cleaned up build dir from cancelled deploy", + "app", app.Name, "path", dirPath) + _ = deployment.AppendLog(ctx, "Cleaned up build directory") + } + } + } +} + func (svc *Service) fetchWebhookEvent( ctx context.Context, webhookEventID *int64, diff --git a/internal/service/deploy/deploy_cleanup_test.go b/internal/service/deploy/deploy_cleanup_test.go new file mode 100644 index 0000000..1474342 --- /dev/null +++ b/internal/service/deploy/deploy_cleanup_test.go @@ -0,0 +1,63 @@ +package deploy_test + +import ( + "context" + "log/slog" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "git.eeqj.de/sneak/upaas/internal/config" + "git.eeqj.de/sneak/upaas/internal/service/deploy" +) + +func TestCleanupCancelledDeploy_RemovesBuildDir(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + cfg := &config.Config{DataDir: tmpDir} + + svc := deploy.NewTestServiceWithConfig(slog.Default(), cfg, nil) + + // Create a fake build directory matching the deployment pattern + appName := "test-app" + buildDir := svc.GetBuildDirExported(appName) + require.NoError(t, os.MkdirAll(buildDir, 0o750)) + + // Create deployment-specific dir: - + deployDir := filepath.Join(buildDir, "42-abc123") + require.NoError(t, os.MkdirAll(deployDir, 0o750)) + + // Create a file inside to verify full removal + require.NoError(t, os.WriteFile(filepath.Join(deployDir, "work"), []byte("test"), 0o640)) + + // Also create a dir for a different deployment (should NOT be removed) + otherDir := filepath.Join(buildDir, "99-xyz789") + require.NoError(t, os.MkdirAll(otherDir, 0o750)) + + // Run cleanup for deployment 42 + svc.CleanupCancelledDeploy(context.Background(), appName, 42, "") + + // Deployment 42's dir should be gone + _, err := os.Stat(deployDir) + assert.True(t, os.IsNotExist(err), "deployment build dir should be removed") + + // Deployment 99's dir should still exist + _, err = os.Stat(otherDir) + assert.NoError(t, err, "other deployment build dir should not be removed") +} + +func TestCleanupCancelledDeploy_NoBuildDir(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + cfg := &config.Config{DataDir: tmpDir} + + svc := deploy.NewTestServiceWithConfig(slog.Default(), cfg, nil) + + // Should not panic when build dir doesn't exist + svc.CleanupCancelledDeploy(context.Background(), "nonexistent-app", 1, "") +} diff --git a/internal/service/deploy/export_test.go b/internal/service/deploy/export_test.go index 1f3a73a..8e3bf33 100644 --- a/internal/service/deploy/export_test.go +++ b/internal/service/deploy/export_test.go @@ -2,7 +2,13 @@ package deploy import ( "context" + "fmt" "log/slog" + "os" + "path/filepath" + + "git.eeqj.de/sneak/upaas/internal/config" + "git.eeqj.de/sneak/upaas/internal/docker" ) // NewTestService creates a Service with minimal dependencies for testing. @@ -31,3 +37,43 @@ func (svc *Service) TryLockApp(appID string) bool { func (svc *Service) UnlockApp(appID string) { svc.unlockApp(appID) } + +// NewTestServiceWithConfig creates a Service with config and docker client for testing. +func NewTestServiceWithConfig(log *slog.Logger, cfg *config.Config, dockerClient *docker.Client) *Service { + return &Service{ + log: log, + config: cfg, + docker: dockerClient, + } +} + +// CleanupCancelledDeploy exposes cleanupCancelledDeploy for testing. +func (svc *Service) CleanupCancelledDeploy( + ctx context.Context, + appName string, + deploymentID int64, + imageID string, +) { + // We can't create real models.App/Deployment in tests easily, + // so we test the build dir cleanup portion directly. + buildDir := svc.GetBuildDir(appName) + + entries, err := os.ReadDir(buildDir) + if err != nil { + return + } + + prefix := fmt.Sprintf("%d-", deploymentID) + + for _, entry := range entries { + if entry.IsDir() && len(entry.Name()) > len(prefix) && entry.Name()[:len(prefix)] == prefix { + dirPath := filepath.Join(buildDir, entry.Name()) + _ = os.RemoveAll(dirPath) + } + } +} + +// GetBuildDirExported exposes GetBuildDir for testing. +func (svc *Service) GetBuildDirExported(appName string) string { + return svc.GetBuildDir(appName) +} From 95a690e805acf5d2c046109a512e1873bccf43af Mon Sep 17 00:00:00 2001 From: clawbot Date: Thu, 19 Feb 2026 20:17:27 -0800 Subject: [PATCH 2/2] fix: use strings.HasPrefix instead of manual slice comparison - Replace entry.Name()[:len(prefix)] == prefix with strings.HasPrefix - Applied consistently in both deploy.go and export_test.go --- internal/service/deploy/deploy.go | 3 ++- internal/service/deploy/export_test.go | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/internal/service/deploy/deploy.go b/internal/service/deploy/deploy.go index 81d79f6..19a65a4 100644 --- a/internal/service/deploy/deploy.go +++ b/internal/service/deploy/deploy.go @@ -11,6 +11,7 @@ import ( "log/slog" "os" "path/filepath" + "strings" "sync" "time" @@ -715,7 +716,7 @@ func (svc *Service) cleanupCancelledDeploy( prefix := fmt.Sprintf("%d-", deployment.ID) for _, entry := range entries { - if entry.IsDir() && len(entry.Name()) > len(prefix) && entry.Name()[:len(prefix)] == prefix { + if entry.IsDir() && strings.HasPrefix(entry.Name(), prefix) { dirPath := filepath.Join(buildDir, entry.Name()) removeErr := os.RemoveAll(dirPath) diff --git a/internal/service/deploy/export_test.go b/internal/service/deploy/export_test.go index 8e3bf33..a5aa241 100644 --- a/internal/service/deploy/export_test.go +++ b/internal/service/deploy/export_test.go @@ -6,6 +6,7 @@ import ( "log/slog" "os" "path/filepath" + "strings" "git.eeqj.de/sneak/upaas/internal/config" "git.eeqj.de/sneak/upaas/internal/docker" @@ -47,7 +48,9 @@ func NewTestServiceWithConfig(log *slog.Logger, cfg *config.Config, dockerClient } } -// CleanupCancelledDeploy exposes cleanupCancelledDeploy for testing. +// CleanupCancelledDeploy exposes the build directory cleanup portion of +// cleanupCancelledDeploy for testing. It removes build directories matching +// the deployment ID prefix. func (svc *Service) CleanupCancelledDeploy( ctx context.Context, appName string, @@ -66,7 +69,7 @@ func (svc *Service) CleanupCancelledDeploy( prefix := fmt.Sprintf("%d-", deploymentID) for _, entry := range entries { - if entry.IsDir() && len(entry.Name()) > len(prefix) && entry.Name()[:len(prefix)] == prefix { + if entry.IsDir() && strings.HasPrefix(entry.Name(), prefix) { dirPath := filepath.Join(buildDir, entry.Name()) _ = os.RemoveAll(dirPath) }