fix: clean up orphan resources on deploy cancellation (closes #89)
This commit is contained in:
parent
b47f871412
commit
729425132b
@ -17,6 +17,7 @@ import (
|
||||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/api/types/container"
|
||||
"github.com/docker/docker/api/types/filters"
|
||||
"github.com/docker/docker/api/types/image"
|
||||
"github.com/docker/docker/api/types/mount"
|
||||
"github.com/docker/docker/api/types/network"
|
||||
"github.com/docker/docker/client"
|
||||
@ -739,6 +740,20 @@ func (c *Client) connect(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemoveImage removes a Docker image by ID or tag.
|
||||
// It returns nil if the image was successfully removed or does not exist.
|
||||
func (c *Client) RemoveImage(ctx context.Context, imageID string) error {
|
||||
_, err := c.docker.ImageRemove(ctx, imageID, image.RemoveOptions{
|
||||
Force: true,
|
||||
PruneChildren: true,
|
||||
})
|
||||
if err != nil && !client.IsErrNotFound(err) {
|
||||
return fmt.Errorf("failed to remove image %s: %w", imageID, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Client) close() error {
|
||||
if c.docker != nil {
|
||||
err := c.docker.Close()
|
||||
|
||||
@ -472,7 +472,7 @@ func (svc *Service) runBuildAndDeploy(
|
||||
// Build phase with timeout
|
||||
imageID, err := svc.buildImageWithTimeout(deployCtx, app, deployment)
|
||||
if err != nil {
|
||||
cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment)
|
||||
cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment, "")
|
||||
if cancelErr != nil {
|
||||
return cancelErr
|
||||
}
|
||||
@ -485,7 +485,7 @@ func (svc *Service) runBuildAndDeploy(
|
||||
// Deploy phase with timeout
|
||||
err = svc.deployContainerWithTimeout(deployCtx, app, deployment, imageID)
|
||||
if err != nil {
|
||||
cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment)
|
||||
cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment, imageID)
|
||||
if cancelErr != nil {
|
||||
return cancelErr
|
||||
}
|
||||
@ -661,24 +661,76 @@ func (svc *Service) cancelActiveDeploy(appID string) {
|
||||
}
|
||||
|
||||
// checkCancelled checks if the deploy context was cancelled (by a newer deploy)
|
||||
// and if so, marks the deployment as cancelled. Returns ErrDeployCancelled or nil.
|
||||
// and if so, marks the deployment as cancelled and cleans up orphan resources.
|
||||
// Returns ErrDeployCancelled or nil.
|
||||
func (svc *Service) checkCancelled(
|
||||
deployCtx context.Context,
|
||||
bgCtx context.Context,
|
||||
app *models.App,
|
||||
deployment *models.Deployment,
|
||||
imageID string,
|
||||
) error {
|
||||
if !errors.Is(deployCtx.Err(), context.Canceled) {
|
||||
return nil
|
||||
}
|
||||
|
||||
svc.log.Info("deployment cancelled by newer deploy", "app", app.Name)
|
||||
svc.log.Info("deployment cancelled", "app", app.Name)
|
||||
|
||||
svc.cleanupCancelledDeploy(bgCtx, app, deployment, imageID)
|
||||
|
||||
_ = deployment.MarkFinished(bgCtx, models.DeploymentStatusCancelled)
|
||||
|
||||
return ErrDeployCancelled
|
||||
}
|
||||
|
||||
// cleanupCancelledDeploy removes orphan resources left by a cancelled deployment.
|
||||
func (svc *Service) cleanupCancelledDeploy(
|
||||
ctx context.Context,
|
||||
app *models.App,
|
||||
deployment *models.Deployment,
|
||||
imageID string,
|
||||
) {
|
||||
// Clean up the intermediate Docker image if one was built
|
||||
if imageID != "" {
|
||||
removeErr := svc.docker.RemoveImage(ctx, imageID)
|
||||
if removeErr != nil {
|
||||
svc.log.Error("failed to remove image from cancelled deploy",
|
||||
"error", removeErr, "app", app.Name, "image", imageID)
|
||||
_ = deployment.AppendLog(ctx, "WARNING: failed to clean up image "+imageID+": "+removeErr.Error())
|
||||
} else {
|
||||
svc.log.Info("cleaned up image from cancelled deploy",
|
||||
"app", app.Name, "image", imageID)
|
||||
_ = deployment.AppendLog(ctx, "Cleaned up intermediate image: "+imageID)
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up the build directory for this deployment
|
||||
buildDir := svc.GetBuildDir(app.Name)
|
||||
|
||||
entries, err := os.ReadDir(buildDir)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
prefix := fmt.Sprintf("%d-", deployment.ID)
|
||||
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() && len(entry.Name()) > len(prefix) && entry.Name()[:len(prefix)] == prefix {
|
||||
dirPath := filepath.Join(buildDir, entry.Name())
|
||||
|
||||
removeErr := os.RemoveAll(dirPath)
|
||||
if removeErr != nil {
|
||||
svc.log.Error("failed to remove build dir from cancelled deploy",
|
||||
"error", removeErr, "path", dirPath)
|
||||
} else {
|
||||
svc.log.Info("cleaned up build dir from cancelled deploy",
|
||||
"app", app.Name, "path", dirPath)
|
||||
_ = deployment.AppendLog(ctx, "Cleaned up build directory")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (svc *Service) fetchWebhookEvent(
|
||||
ctx context.Context,
|
||||
webhookEventID *int64,
|
||||
|
||||
63
internal/service/deploy/deploy_cleanup_test.go
Normal file
63
internal/service/deploy/deploy_cleanup_test.go
Normal file
@ -0,0 +1,63 @@
|
||||
package deploy_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"git.eeqj.de/sneak/upaas/internal/config"
|
||||
"git.eeqj.de/sneak/upaas/internal/service/deploy"
|
||||
)
|
||||
|
||||
func TestCleanupCancelledDeploy_RemovesBuildDir(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
cfg := &config.Config{DataDir: tmpDir}
|
||||
|
||||
svc := deploy.NewTestServiceWithConfig(slog.Default(), cfg, nil)
|
||||
|
||||
// Create a fake build directory matching the deployment pattern
|
||||
appName := "test-app"
|
||||
buildDir := svc.GetBuildDirExported(appName)
|
||||
require.NoError(t, os.MkdirAll(buildDir, 0o750))
|
||||
|
||||
// Create deployment-specific dir: <deploymentID>-<random>
|
||||
deployDir := filepath.Join(buildDir, "42-abc123")
|
||||
require.NoError(t, os.MkdirAll(deployDir, 0o750))
|
||||
|
||||
// Create a file inside to verify full removal
|
||||
require.NoError(t, os.WriteFile(filepath.Join(deployDir, "work"), []byte("test"), 0o640))
|
||||
|
||||
// Also create a dir for a different deployment (should NOT be removed)
|
||||
otherDir := filepath.Join(buildDir, "99-xyz789")
|
||||
require.NoError(t, os.MkdirAll(otherDir, 0o750))
|
||||
|
||||
// Run cleanup for deployment 42
|
||||
svc.CleanupCancelledDeploy(context.Background(), appName, 42, "")
|
||||
|
||||
// Deployment 42's dir should be gone
|
||||
_, err := os.Stat(deployDir)
|
||||
assert.True(t, os.IsNotExist(err), "deployment build dir should be removed")
|
||||
|
||||
// Deployment 99's dir should still exist
|
||||
_, err = os.Stat(otherDir)
|
||||
assert.NoError(t, err, "other deployment build dir should not be removed")
|
||||
}
|
||||
|
||||
func TestCleanupCancelledDeploy_NoBuildDir(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
cfg := &config.Config{DataDir: tmpDir}
|
||||
|
||||
svc := deploy.NewTestServiceWithConfig(slog.Default(), cfg, nil)
|
||||
|
||||
// Should not panic when build dir doesn't exist
|
||||
svc.CleanupCancelledDeploy(context.Background(), "nonexistent-app", 1, "")
|
||||
}
|
||||
@ -2,7 +2,13 @@ package deploy
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"git.eeqj.de/sneak/upaas/internal/config"
|
||||
"git.eeqj.de/sneak/upaas/internal/docker"
|
||||
)
|
||||
|
||||
// NewTestService creates a Service with minimal dependencies for testing.
|
||||
@ -31,3 +37,43 @@ func (svc *Service) TryLockApp(appID string) bool {
|
||||
func (svc *Service) UnlockApp(appID string) {
|
||||
svc.unlockApp(appID)
|
||||
}
|
||||
|
||||
// NewTestServiceWithConfig creates a Service with config and docker client for testing.
|
||||
func NewTestServiceWithConfig(log *slog.Logger, cfg *config.Config, dockerClient *docker.Client) *Service {
|
||||
return &Service{
|
||||
log: log,
|
||||
config: cfg,
|
||||
docker: dockerClient,
|
||||
}
|
||||
}
|
||||
|
||||
// CleanupCancelledDeploy exposes cleanupCancelledDeploy for testing.
|
||||
func (svc *Service) CleanupCancelledDeploy(
|
||||
ctx context.Context,
|
||||
appName string,
|
||||
deploymentID int64,
|
||||
imageID string,
|
||||
) {
|
||||
// We can't create real models.App/Deployment in tests easily,
|
||||
// so we test the build dir cleanup portion directly.
|
||||
buildDir := svc.GetBuildDir(appName)
|
||||
|
||||
entries, err := os.ReadDir(buildDir)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
prefix := fmt.Sprintf("%d-", deploymentID)
|
||||
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() && len(entry.Name()) > len(prefix) && entry.Name()[:len(prefix)] == prefix {
|
||||
dirPath := filepath.Join(buildDir, entry.Name())
|
||||
_ = os.RemoveAll(dirPath)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// GetBuildDirExported exposes GetBuildDir for testing.
|
||||
func (svc *Service) GetBuildDirExported(appName string) string {
|
||||
return svc.GetBuildDir(appName)
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user