Merge pull request 'fix: clean up orphan resources on deploy cancellation (closes #89)' (#93) from fix/deploy-cancel-cleanup into main
Reviewed-on: #93
This commit is contained in:
commit
06e8e66443
@ -17,6 +17,7 @@ import (
|
|||||||
"github.com/docker/docker/api/types"
|
"github.com/docker/docker/api/types"
|
||||||
"github.com/docker/docker/api/types/container"
|
"github.com/docker/docker/api/types/container"
|
||||||
"github.com/docker/docker/api/types/filters"
|
"github.com/docker/docker/api/types/filters"
|
||||||
|
"github.com/docker/docker/api/types/image"
|
||||||
"github.com/docker/docker/api/types/mount"
|
"github.com/docker/docker/api/types/mount"
|
||||||
"github.com/docker/docker/api/types/network"
|
"github.com/docker/docker/api/types/network"
|
||||||
"github.com/docker/docker/client"
|
"github.com/docker/docker/client"
|
||||||
@ -739,6 +740,20 @@ func (c *Client) connect(ctx context.Context) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RemoveImage removes a Docker image by ID or tag.
|
||||||
|
// It returns nil if the image was successfully removed or does not exist.
|
||||||
|
func (c *Client) RemoveImage(ctx context.Context, imageID string) error {
|
||||||
|
_, err := c.docker.ImageRemove(ctx, imageID, image.RemoveOptions{
|
||||||
|
Force: true,
|
||||||
|
PruneChildren: true,
|
||||||
|
})
|
||||||
|
if err != nil && !client.IsErrNotFound(err) {
|
||||||
|
return fmt.Errorf("failed to remove image %s: %w", imageID, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (c *Client) close() error {
|
func (c *Client) close() error {
|
||||||
if c.docker != nil {
|
if c.docker != nil {
|
||||||
err := c.docker.Close()
|
err := c.docker.Close()
|
||||||
|
|||||||
@ -11,6 +11,7 @@ import (
|
|||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -472,7 +473,7 @@ func (svc *Service) runBuildAndDeploy(
|
|||||||
// Build phase with timeout
|
// Build phase with timeout
|
||||||
imageID, err := svc.buildImageWithTimeout(deployCtx, app, deployment)
|
imageID, err := svc.buildImageWithTimeout(deployCtx, app, deployment)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment)
|
cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment, "")
|
||||||
if cancelErr != nil {
|
if cancelErr != nil {
|
||||||
return cancelErr
|
return cancelErr
|
||||||
}
|
}
|
||||||
@ -485,7 +486,7 @@ func (svc *Service) runBuildAndDeploy(
|
|||||||
// Deploy phase with timeout
|
// Deploy phase with timeout
|
||||||
err = svc.deployContainerWithTimeout(deployCtx, app, deployment, imageID)
|
err = svc.deployContainerWithTimeout(deployCtx, app, deployment, imageID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment)
|
cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment, imageID)
|
||||||
if cancelErr != nil {
|
if cancelErr != nil {
|
||||||
return cancelErr
|
return cancelErr
|
||||||
}
|
}
|
||||||
@ -661,24 +662,76 @@ func (svc *Service) cancelActiveDeploy(appID string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// checkCancelled checks if the deploy context was cancelled (by a newer deploy)
|
// checkCancelled checks if the deploy context was cancelled (by a newer deploy)
|
||||||
// and if so, marks the deployment as cancelled. Returns ErrDeployCancelled or nil.
|
// and if so, marks the deployment as cancelled and cleans up orphan resources.
|
||||||
|
// Returns ErrDeployCancelled or nil.
|
||||||
func (svc *Service) checkCancelled(
|
func (svc *Service) checkCancelled(
|
||||||
deployCtx context.Context,
|
deployCtx context.Context,
|
||||||
bgCtx context.Context,
|
bgCtx context.Context,
|
||||||
app *models.App,
|
app *models.App,
|
||||||
deployment *models.Deployment,
|
deployment *models.Deployment,
|
||||||
|
imageID string,
|
||||||
) error {
|
) error {
|
||||||
if !errors.Is(deployCtx.Err(), context.Canceled) {
|
if !errors.Is(deployCtx.Err(), context.Canceled) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
svc.log.Info("deployment cancelled by newer deploy", "app", app.Name)
|
svc.log.Info("deployment cancelled", "app", app.Name)
|
||||||
|
|
||||||
|
svc.cleanupCancelledDeploy(bgCtx, app, deployment, imageID)
|
||||||
|
|
||||||
_ = deployment.MarkFinished(bgCtx, models.DeploymentStatusCancelled)
|
_ = deployment.MarkFinished(bgCtx, models.DeploymentStatusCancelled)
|
||||||
|
|
||||||
return ErrDeployCancelled
|
return ErrDeployCancelled
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cleanupCancelledDeploy removes orphan resources left by a cancelled deployment.
|
||||||
|
func (svc *Service) cleanupCancelledDeploy(
|
||||||
|
ctx context.Context,
|
||||||
|
app *models.App,
|
||||||
|
deployment *models.Deployment,
|
||||||
|
imageID string,
|
||||||
|
) {
|
||||||
|
// Clean up the intermediate Docker image if one was built
|
||||||
|
if imageID != "" {
|
||||||
|
removeErr := svc.docker.RemoveImage(ctx, imageID)
|
||||||
|
if removeErr != nil {
|
||||||
|
svc.log.Error("failed to remove image from cancelled deploy",
|
||||||
|
"error", removeErr, "app", app.Name, "image", imageID)
|
||||||
|
_ = deployment.AppendLog(ctx, "WARNING: failed to clean up image "+imageID+": "+removeErr.Error())
|
||||||
|
} else {
|
||||||
|
svc.log.Info("cleaned up image from cancelled deploy",
|
||||||
|
"app", app.Name, "image", imageID)
|
||||||
|
_ = deployment.AppendLog(ctx, "Cleaned up intermediate image: "+imageID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up the build directory for this deployment
|
||||||
|
buildDir := svc.GetBuildDir(app.Name)
|
||||||
|
|
||||||
|
entries, err := os.ReadDir(buildDir)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
prefix := fmt.Sprintf("%d-", deployment.ID)
|
||||||
|
|
||||||
|
for _, entry := range entries {
|
||||||
|
if entry.IsDir() && strings.HasPrefix(entry.Name(), prefix) {
|
||||||
|
dirPath := filepath.Join(buildDir, entry.Name())
|
||||||
|
|
||||||
|
removeErr := os.RemoveAll(dirPath)
|
||||||
|
if removeErr != nil {
|
||||||
|
svc.log.Error("failed to remove build dir from cancelled deploy",
|
||||||
|
"error", removeErr, "path", dirPath)
|
||||||
|
} else {
|
||||||
|
svc.log.Info("cleaned up build dir from cancelled deploy",
|
||||||
|
"app", app.Name, "path", dirPath)
|
||||||
|
_ = deployment.AppendLog(ctx, "Cleaned up build directory")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (svc *Service) fetchWebhookEvent(
|
func (svc *Service) fetchWebhookEvent(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
webhookEventID *int64,
|
webhookEventID *int64,
|
||||||
|
|||||||
63
internal/service/deploy/deploy_cleanup_test.go
Normal file
63
internal/service/deploy/deploy_cleanup_test.go
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
package deploy_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"git.eeqj.de/sneak/upaas/internal/config"
|
||||||
|
"git.eeqj.de/sneak/upaas/internal/service/deploy"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCleanupCancelledDeploy_RemovesBuildDir(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
tmpDir := t.TempDir()
|
||||||
|
cfg := &config.Config{DataDir: tmpDir}
|
||||||
|
|
||||||
|
svc := deploy.NewTestServiceWithConfig(slog.Default(), cfg, nil)
|
||||||
|
|
||||||
|
// Create a fake build directory matching the deployment pattern
|
||||||
|
appName := "test-app"
|
||||||
|
buildDir := svc.GetBuildDirExported(appName)
|
||||||
|
require.NoError(t, os.MkdirAll(buildDir, 0o750))
|
||||||
|
|
||||||
|
// Create deployment-specific dir: <deploymentID>-<random>
|
||||||
|
deployDir := filepath.Join(buildDir, "42-abc123")
|
||||||
|
require.NoError(t, os.MkdirAll(deployDir, 0o750))
|
||||||
|
|
||||||
|
// Create a file inside to verify full removal
|
||||||
|
require.NoError(t, os.WriteFile(filepath.Join(deployDir, "work"), []byte("test"), 0o640))
|
||||||
|
|
||||||
|
// Also create a dir for a different deployment (should NOT be removed)
|
||||||
|
otherDir := filepath.Join(buildDir, "99-xyz789")
|
||||||
|
require.NoError(t, os.MkdirAll(otherDir, 0o750))
|
||||||
|
|
||||||
|
// Run cleanup for deployment 42
|
||||||
|
svc.CleanupCancelledDeploy(context.Background(), appName, 42, "")
|
||||||
|
|
||||||
|
// Deployment 42's dir should be gone
|
||||||
|
_, err := os.Stat(deployDir)
|
||||||
|
assert.True(t, os.IsNotExist(err), "deployment build dir should be removed")
|
||||||
|
|
||||||
|
// Deployment 99's dir should still exist
|
||||||
|
_, err = os.Stat(otherDir)
|
||||||
|
assert.NoError(t, err, "other deployment build dir should not be removed")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCleanupCancelledDeploy_NoBuildDir(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
tmpDir := t.TempDir()
|
||||||
|
cfg := &config.Config{DataDir: tmpDir}
|
||||||
|
|
||||||
|
svc := deploy.NewTestServiceWithConfig(slog.Default(), cfg, nil)
|
||||||
|
|
||||||
|
// Should not panic when build dir doesn't exist
|
||||||
|
svc.CleanupCancelledDeploy(context.Background(), "nonexistent-app", 1, "")
|
||||||
|
}
|
||||||
@ -2,7 +2,14 @@ package deploy
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"git.eeqj.de/sneak/upaas/internal/config"
|
||||||
|
"git.eeqj.de/sneak/upaas/internal/docker"
|
||||||
)
|
)
|
||||||
|
|
||||||
// NewTestService creates a Service with minimal dependencies for testing.
|
// NewTestService creates a Service with minimal dependencies for testing.
|
||||||
@ -31,3 +38,45 @@ func (svc *Service) TryLockApp(appID string) bool {
|
|||||||
func (svc *Service) UnlockApp(appID string) {
|
func (svc *Service) UnlockApp(appID string) {
|
||||||
svc.unlockApp(appID)
|
svc.unlockApp(appID)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewTestServiceWithConfig creates a Service with config and docker client for testing.
|
||||||
|
func NewTestServiceWithConfig(log *slog.Logger, cfg *config.Config, dockerClient *docker.Client) *Service {
|
||||||
|
return &Service{
|
||||||
|
log: log,
|
||||||
|
config: cfg,
|
||||||
|
docker: dockerClient,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CleanupCancelledDeploy exposes the build directory cleanup portion of
|
||||||
|
// cleanupCancelledDeploy for testing. It removes build directories matching
|
||||||
|
// the deployment ID prefix.
|
||||||
|
func (svc *Service) CleanupCancelledDeploy(
|
||||||
|
ctx context.Context,
|
||||||
|
appName string,
|
||||||
|
deploymentID int64,
|
||||||
|
imageID string,
|
||||||
|
) {
|
||||||
|
// We can't create real models.App/Deployment in tests easily,
|
||||||
|
// so we test the build dir cleanup portion directly.
|
||||||
|
buildDir := svc.GetBuildDir(appName)
|
||||||
|
|
||||||
|
entries, err := os.ReadDir(buildDir)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
prefix := fmt.Sprintf("%d-", deploymentID)
|
||||||
|
|
||||||
|
for _, entry := range entries {
|
||||||
|
if entry.IsDir() && strings.HasPrefix(entry.Name(), prefix) {
|
||||||
|
dirPath := filepath.Join(buildDir, entry.Name())
|
||||||
|
_ = os.RemoveAll(dirPath)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetBuildDirExported exposes GetBuildDir for testing.
|
||||||
|
func (svc *Service) GetBuildDirExported(appName string) string {
|
||||||
|
return svc.GetBuildDir(appName)
|
||||||
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user