fix: cancel in-progress deploy when webhook triggers new deploy (closes #38)
When a webhook-triggered deploy starts for an app that already has a deploy in progress, the existing deploy is now cancelled via context cancellation before the new deploy begins. This prevents silently lost webhook deploys. Changes: - Add per-app active deploy tracking with cancel func and done channel - Deploy() accepts cancelExisting param: true for webhook, false for manual - Cancelled deployments are marked with new 'cancelled' status - Add ErrDeployCancelled sentinel error - Add DeploymentStatusCancelled model constant - Add comprehensive tests for cancellation mechanics
This commit is contained in:
parent
9a284d40fd
commit
3f499163a7
@ -314,7 +314,7 @@ func (h *Handlers) HandleAppDeploy() http.HandlerFunc {
|
|||||||
deployCtx := context.WithoutCancel(request.Context())
|
deployCtx := context.WithoutCancel(request.Context())
|
||||||
|
|
||||||
go func(ctx context.Context, appToDeploy *models.App) {
|
go func(ctx context.Context, appToDeploy *models.App) {
|
||||||
deployErr := h.deploy.Deploy(ctx, appToDeploy, nil)
|
deployErr := h.deploy.Deploy(ctx, appToDeploy, nil, false)
|
||||||
if deployErr != nil {
|
if deployErr != nil {
|
||||||
h.log.Error(
|
h.log.Error(
|
||||||
"deployment failed",
|
"deployment failed",
|
||||||
|
|||||||
@ -19,6 +19,7 @@ const (
|
|||||||
DeploymentStatusDeploying DeploymentStatus = "deploying"
|
DeploymentStatusDeploying DeploymentStatus = "deploying"
|
||||||
DeploymentStatusSuccess DeploymentStatus = "success"
|
DeploymentStatusSuccess DeploymentStatus = "success"
|
||||||
DeploymentStatusFailed DeploymentStatus = "failed"
|
DeploymentStatusFailed DeploymentStatus = "failed"
|
||||||
|
DeploymentStatusCancelled DeploymentStatus = "cancelled"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Display constants.
|
// Display constants.
|
||||||
|
|||||||
@ -43,6 +43,8 @@ var (
|
|||||||
ErrContainerUnhealthy = errors.New("container unhealthy after 60 seconds")
|
ErrContainerUnhealthy = errors.New("container unhealthy after 60 seconds")
|
||||||
// ErrDeploymentInProgress indicates another deployment is already running.
|
// ErrDeploymentInProgress indicates another deployment is already running.
|
||||||
ErrDeploymentInProgress = errors.New("deployment already in progress for this app")
|
ErrDeploymentInProgress = errors.New("deployment already in progress for this app")
|
||||||
|
// ErrDeployCancelled indicates the deployment was cancelled by a newer deploy.
|
||||||
|
ErrDeployCancelled = errors.New("deployment cancelled by newer deploy")
|
||||||
// ErrBuildTimeout indicates the build phase exceeded the timeout.
|
// ErrBuildTimeout indicates the build phase exceeded the timeout.
|
||||||
ErrBuildTimeout = errors.New("build timeout exceeded")
|
ErrBuildTimeout = errors.New("build timeout exceeded")
|
||||||
// ErrDeployTimeout indicates the deploy phase exceeded the timeout.
|
// ErrDeployTimeout indicates the deploy phase exceeded the timeout.
|
||||||
@ -205,6 +207,12 @@ type ServiceParams struct {
|
|||||||
Notify *notify.Service
|
Notify *notify.Service
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// activeDeploy tracks a running deployment so it can be cancelled.
|
||||||
|
type activeDeploy struct {
|
||||||
|
cancel context.CancelFunc
|
||||||
|
done chan struct{}
|
||||||
|
}
|
||||||
|
|
||||||
// Service provides deployment functionality.
|
// Service provides deployment functionality.
|
||||||
type Service struct {
|
type Service struct {
|
||||||
log *slog.Logger
|
log *slog.Logger
|
||||||
@ -213,6 +221,7 @@ type Service struct {
|
|||||||
notify *notify.Service
|
notify *notify.Service
|
||||||
config *config.Config
|
config *config.Config
|
||||||
params *ServiceParams
|
params *ServiceParams
|
||||||
|
activeDeploys sync.Map // map[string]*activeDeploy - per-app active deployment tracking
|
||||||
appLocks sync.Map // map[string]*sync.Mutex - per-app deployment locks
|
appLocks sync.Map // map[string]*sync.Mutex - per-app deployment locks
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -274,12 +283,19 @@ func (svc *Service) GetLogFilePath(app *models.App, deployment *models.Deploymen
|
|||||||
return filepath.Join(svc.config.DataDir, "logs", hostname, app.Name, filename)
|
return filepath.Join(svc.config.DataDir, "logs", hostname, app.Name, filename)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Deploy deploys an app.
|
// Deploy deploys an app. If cancelExisting is true (e.g. webhook-triggered),
|
||||||
|
// any in-progress deploy for the same app will be cancelled before starting.
|
||||||
|
// If cancelExisting is false and a deploy is in progress, ErrDeploymentInProgress is returned.
|
||||||
func (svc *Service) Deploy(
|
func (svc *Service) Deploy(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
app *models.App,
|
app *models.App,
|
||||||
webhookEventID *int64,
|
webhookEventID *int64,
|
||||||
|
cancelExisting bool,
|
||||||
) error {
|
) error {
|
||||||
|
if cancelExisting {
|
||||||
|
svc.cancelActiveDeploy(app.ID)
|
||||||
|
}
|
||||||
|
|
||||||
// Try to acquire per-app deployment lock
|
// Try to acquire per-app deployment lock
|
||||||
if !svc.tryLockApp(app.ID) {
|
if !svc.tryLockApp(app.ID) {
|
||||||
svc.log.Warn("deployment already in progress", "app", app.Name)
|
svc.log.Warn("deployment already in progress", "app", app.Name)
|
||||||
@ -288,45 +304,80 @@ func (svc *Service) Deploy(
|
|||||||
}
|
}
|
||||||
defer svc.unlockApp(app.ID)
|
defer svc.unlockApp(app.ID)
|
||||||
|
|
||||||
|
// Set up cancellable context and register as active deploy
|
||||||
|
deployCtx, cancel := context.WithCancel(ctx)
|
||||||
|
done := make(chan struct{})
|
||||||
|
ad := &activeDeploy{cancel: cancel, done: done}
|
||||||
|
svc.activeDeploys.Store(app.ID, ad)
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
cancel()
|
||||||
|
close(done)
|
||||||
|
svc.activeDeploys.Delete(app.ID)
|
||||||
|
}()
|
||||||
|
|
||||||
// Fetch webhook event and create deployment record
|
// Fetch webhook event and create deployment record
|
||||||
webhookEvent := svc.fetchWebhookEvent(ctx, webhookEventID)
|
webhookEvent := svc.fetchWebhookEvent(deployCtx, webhookEventID)
|
||||||
|
|
||||||
deployment, err := svc.createDeploymentRecord(ctx, app, webhookEventID, webhookEvent)
|
// Use a background context for DB operations that must complete regardless of cancellation
|
||||||
|
bgCtx := context.WithoutCancel(deployCtx)
|
||||||
|
|
||||||
|
deployment, err := svc.createDeploymentRecord(bgCtx, app, webhookEventID, webhookEvent)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
svc.logWebhookPayload(ctx, deployment, webhookEvent)
|
svc.logWebhookPayload(bgCtx, deployment, webhookEvent)
|
||||||
|
|
||||||
err = svc.updateAppStatusBuilding(ctx, app)
|
err = svc.updateAppStatusBuilding(bgCtx, app)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
svc.notify.NotifyBuildStart(ctx, app, deployment)
|
svc.notify.NotifyBuildStart(bgCtx, app, deployment)
|
||||||
|
|
||||||
|
return svc.runBuildAndDeploy(deployCtx, bgCtx, app, deployment)
|
||||||
|
}
|
||||||
|
|
||||||
|
// runBuildAndDeploy executes the build and deploy phases, handling cancellation.
|
||||||
|
func (svc *Service) runBuildAndDeploy(
|
||||||
|
deployCtx context.Context,
|
||||||
|
bgCtx context.Context,
|
||||||
|
app *models.App,
|
||||||
|
deployment *models.Deployment,
|
||||||
|
) error {
|
||||||
// Build phase with timeout
|
// Build phase with timeout
|
||||||
imageID, err := svc.buildImageWithTimeout(ctx, app, deployment)
|
imageID, err := svc.buildImageWithTimeout(deployCtx, app, deployment)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment)
|
||||||
|
if cancelErr != nil {
|
||||||
|
return cancelErr
|
||||||
|
}
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
svc.notify.NotifyBuildSuccess(ctx, app, deployment)
|
svc.notify.NotifyBuildSuccess(bgCtx, app, deployment)
|
||||||
|
|
||||||
// Deploy phase with timeout
|
// Deploy phase with timeout
|
||||||
err = svc.deployContainerWithTimeout(ctx, app, deployment, imageID)
|
err = svc.deployContainerWithTimeout(deployCtx, app, deployment, imageID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment)
|
||||||
|
if cancelErr != nil {
|
||||||
|
return cancelErr
|
||||||
|
}
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
err = svc.updateAppRunning(ctx, app, imageID)
|
err = svc.updateAppRunning(bgCtx, app, imageID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use context.WithoutCancel to ensure health check completes even if
|
// Use context.WithoutCancel to ensure health check completes even if
|
||||||
// the parent context is cancelled (e.g., HTTP request ends).
|
// the parent context is cancelled (e.g., HTTP request ends).
|
||||||
go svc.checkHealthAfterDelay(context.WithoutCancel(ctx), app, deployment)
|
go svc.checkHealthAfterDelay(bgCtx, app, deployment)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -463,6 +514,43 @@ func (svc *Service) unlockApp(appID string) {
|
|||||||
svc.getAppLock(appID).Unlock()
|
svc.getAppLock(appID).Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cancelActiveDeploy cancels any in-progress deployment for the given app
|
||||||
|
// and waits for it to finish before returning.
|
||||||
|
func (svc *Service) cancelActiveDeploy(appID string) {
|
||||||
|
val, ok := svc.activeDeploys.Load(appID)
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ad, ok := val.(*activeDeploy)
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
svc.log.Info("cancelling in-progress deployment", "app_id", appID)
|
||||||
|
ad.cancel()
|
||||||
|
<-ad.done
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkCancelled checks if the deploy context was cancelled (by a newer deploy)
|
||||||
|
// and if so, marks the deployment as cancelled. Returns ErrDeployCancelled or nil.
|
||||||
|
func (svc *Service) checkCancelled(
|
||||||
|
deployCtx context.Context,
|
||||||
|
bgCtx context.Context,
|
||||||
|
app *models.App,
|
||||||
|
deployment *models.Deployment,
|
||||||
|
) error {
|
||||||
|
if !errors.Is(deployCtx.Err(), context.Canceled) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
svc.log.Info("deployment cancelled by newer deploy", "app", app.Name)
|
||||||
|
|
||||||
|
_ = deployment.MarkFinished(bgCtx, models.DeploymentStatusCancelled)
|
||||||
|
|
||||||
|
return ErrDeployCancelled
|
||||||
|
}
|
||||||
|
|
||||||
func (svc *Service) fetchWebhookEvent(
|
func (svc *Service) fetchWebhookEvent(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
webhookEventID *int64,
|
webhookEventID *int64,
|
||||||
|
|||||||
133
internal/service/deploy/deploy_cancel_test.go
Normal file
133
internal/service/deploy/deploy_cancel_test.go
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
package deploy_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"log/slog"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
|
"git.eeqj.de/sneak/upaas/internal/service/deploy"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCancelActiveDeploy_NoExisting(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
svc := deploy.NewTestService(slog.Default())
|
||||||
|
|
||||||
|
// Should not panic or block when no active deploy exists
|
||||||
|
svc.CancelActiveDeploy("nonexistent-app")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCancelActiveDeploy_CancelsAndWaits(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
svc := deploy.NewTestService(slog.Default())
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
done := make(chan struct{})
|
||||||
|
|
||||||
|
svc.RegisterActiveDeploy("app-1", cancel, done)
|
||||||
|
|
||||||
|
// Simulate a running deploy that respects cancellation
|
||||||
|
var deployFinished bool
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
<-ctx.Done()
|
||||||
|
|
||||||
|
deployFinished = true
|
||||||
|
|
||||||
|
close(done)
|
||||||
|
}()
|
||||||
|
|
||||||
|
svc.CancelActiveDeploy("app-1")
|
||||||
|
assert.True(t, deployFinished, "deploy should have finished after cancellation")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCancelActiveDeploy_BlocksUntilDone(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
svc := deploy.NewTestService(slog.Default())
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
done := make(chan struct{})
|
||||||
|
|
||||||
|
svc.RegisterActiveDeploy("app-2", cancel, done)
|
||||||
|
|
||||||
|
// Simulate slow cleanup after cancellation
|
||||||
|
go func() {
|
||||||
|
<-ctx.Done()
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
close(done)
|
||||||
|
}()
|
||||||
|
|
||||||
|
start := time.Now()
|
||||||
|
|
||||||
|
svc.CancelActiveDeploy("app-2")
|
||||||
|
|
||||||
|
elapsed := time.Since(start)
|
||||||
|
|
||||||
|
assert.GreaterOrEqual(t, elapsed, 50*time.Millisecond,
|
||||||
|
"cancelActiveDeploy should block until the deploy finishes")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTryLockApp_PreventsConcurrent(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
svc := deploy.NewTestService(slog.Default())
|
||||||
|
|
||||||
|
assert.True(t, svc.TryLockApp("app-1"), "first lock should succeed")
|
||||||
|
assert.False(t, svc.TryLockApp("app-1"), "second lock should fail")
|
||||||
|
|
||||||
|
svc.UnlockApp("app-1")
|
||||||
|
|
||||||
|
assert.True(t, svc.TryLockApp("app-1"), "lock after unlock should succeed")
|
||||||
|
|
||||||
|
svc.UnlockApp("app-1")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCancelActiveDeploy_AllowsNewDeploy(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
svc := deploy.NewTestService(slog.Default())
|
||||||
|
|
||||||
|
// Simulate an active deploy holding the lock
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
done := make(chan struct{})
|
||||||
|
|
||||||
|
svc.RegisterActiveDeploy("app-3", cancel, done)
|
||||||
|
|
||||||
|
// Lock the app as if a deploy is in progress
|
||||||
|
assert.True(t, svc.TryLockApp("app-3"))
|
||||||
|
|
||||||
|
// Simulate deploy goroutine: release lock on cancellation
|
||||||
|
var mu sync.Mutex
|
||||||
|
|
||||||
|
released := false
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
<-ctx.Done()
|
||||||
|
|
||||||
|
svc.UnlockApp("app-3")
|
||||||
|
|
||||||
|
mu.Lock()
|
||||||
|
released = true
|
||||||
|
mu.Unlock()
|
||||||
|
|
||||||
|
close(done)
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Cancel should cause the old deploy to release its lock
|
||||||
|
svc.CancelActiveDeploy("app-3")
|
||||||
|
|
||||||
|
mu.Lock()
|
||||||
|
assert.True(t, released)
|
||||||
|
mu.Unlock()
|
||||||
|
|
||||||
|
// Now a new deploy should be able to acquire the lock
|
||||||
|
assert.True(t, svc.TryLockApp("app-3"), "should be able to lock after cancellation")
|
||||||
|
|
||||||
|
svc.UnlockApp("app-3")
|
||||||
|
}
|
||||||
33
internal/service/deploy/export_test.go
Normal file
33
internal/service/deploy/export_test.go
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
package deploy
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"log/slog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NewTestService creates a Service with minimal dependencies for testing.
|
||||||
|
func NewTestService(log *slog.Logger) *Service {
|
||||||
|
return &Service{
|
||||||
|
log: log,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CancelActiveDeploy exposes cancelActiveDeploy for testing.
|
||||||
|
func (svc *Service) CancelActiveDeploy(appID string) {
|
||||||
|
svc.cancelActiveDeploy(appID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// RegisterActiveDeploy registers an active deploy for testing.
|
||||||
|
func (svc *Service) RegisterActiveDeploy(appID string, cancel context.CancelFunc, done chan struct{}) {
|
||||||
|
svc.activeDeploys.Store(appID, &activeDeploy{cancel: cancel, done: done})
|
||||||
|
}
|
||||||
|
|
||||||
|
// TryLockApp exposes tryLockApp for testing.
|
||||||
|
func (svc *Service) TryLockApp(appID string) bool {
|
||||||
|
return svc.tryLockApp(appID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnlockApp exposes unlockApp for testing.
|
||||||
|
func (svc *Service) UnlockApp(appID string) {
|
||||||
|
svc.unlockApp(appID)
|
||||||
|
}
|
||||||
@ -143,7 +143,7 @@ func (svc *Service) triggerDeployment(
|
|||||||
// even if the HTTP request context is cancelled.
|
// even if the HTTP request context is cancelled.
|
||||||
deployCtx := context.WithoutCancel(ctx)
|
deployCtx := context.WithoutCancel(ctx)
|
||||||
|
|
||||||
deployErr := svc.deploy.Deploy(deployCtx, app, &eventID)
|
deployErr := svc.deploy.Deploy(deployCtx, app, &eventID, true)
|
||||||
if deployErr != nil {
|
if deployErr != nil {
|
||||||
svc.log.Error("deployment failed", "error", deployErr, "app", appName)
|
svc.log.Error("deployment failed", "error", deployErr, "app", appName)
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user