forked from sneak/upaas
Consolidate database schema into two files: init migrations table and complete schema
Since there is no existing installed base, we can consolidate all migrations into a single complete schema file plus the migrations table initialization. This simplifies the database setup for new installations.
This commit is contained in:
180
BUGS.md
Normal file
180
BUGS.md
Normal file
@@ -0,0 +1,180 @@
|
||||
# Bugs in µPaaS
|
||||
|
||||
## 1. Potential Race Condition in Log Writing
|
||||
|
||||
### Description
|
||||
In the deployment service, when a deployment fails, the `failDeployment` function calls `writeLogsToFile` which may be called concurrently with the async log writer's flush operations. This could lead to partial or corrupted log files.
|
||||
|
||||
### Location
|
||||
`internal/service/deploy/deploy.go:1169` in `failDeployment` function
|
||||
|
||||
### Proposed Fix
|
||||
1. Add synchronization to ensure only one log write operation occurs at a time
|
||||
2. Modify the `deploymentLogWriter` to track completion status and prevent concurrent writes
|
||||
3. Add a wait mechanism in `failDeployment` to ensure any ongoing flush operations complete before writing logs to file
|
||||
|
||||
```go
|
||||
// Add a mutex to deploymentLogWriter
|
||||
type deploymentLogWriter struct {
|
||||
// existing fields...
|
||||
mu sync.Mutex
|
||||
writeMu sync.Mutex // Add this for file writing synchronization
|
||||
done chan struct{}
|
||||
flushed sync.WaitGroup
|
||||
}
|
||||
|
||||
// In writeLogsToFile, ensure exclusive access
|
||||
func (svc *Service) writeLogsToFile(app *models.App, deployment *models.Deployment) {
|
||||
svc.writeMu.Lock() // Add this mutex to Service struct
|
||||
defer svc.writeMu.Unlock()
|
||||
// existing code...
|
||||
}
|
||||
```
|
||||
|
||||
## 2. Incomplete Error Handling in Container Operations
|
||||
|
||||
### Description
|
||||
In the Docker client's `performClone` function, if `createGitContainer` fails, the SSH key file created earlier is not cleaned up, causing a potential security risk.
|
||||
|
||||
### Location
|
||||
`internal/docker/client.go:597` in `performClone` function
|
||||
|
||||
### Proposed Fix
|
||||
Add proper cleanup using `defer` immediately after creating the SSH key file:
|
||||
|
||||
```go
|
||||
// After writing SSH key file (line 578)
|
||||
keyFileCreated := false
|
||||
err = os.WriteFile(cfg.keyFile, []byte(cfg.sshPrivateKey), sshKeyPermissions)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to write SSH key: %w", err)
|
||||
}
|
||||
keyFileCreated = true
|
||||
|
||||
defer func() {
|
||||
if keyFileCreated {
|
||||
removeErr := os.Remove(cfg.keyFile)
|
||||
if removeErr != nil {
|
||||
c.log.Error("failed to remove SSH key file", "error", removeErr)
|
||||
}
|
||||
}
|
||||
}()
|
||||
```
|
||||
|
||||
## 3. Missing Context Cancellation Check During Build
|
||||
|
||||
### Description
|
||||
In the deployment service's `streamBuildOutput` function, long-running Docker build operations may not properly respond to context cancellation, causing deployments to hang even when cancelled.
|
||||
|
||||
### Location
|
||||
`internal/docker/client.go:542` in `streamBuildOutput` function
|
||||
|
||||
### Proposed Fix
|
||||
Add context checking in the scanner loop:
|
||||
|
||||
```go
|
||||
for scanner.Scan() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
line := scanner.Bytes()
|
||||
// existing code...
|
||||
}
|
||||
```
|
||||
|
||||
## 4. Inconsistent Container Removal in Error Cases
|
||||
|
||||
### Description
|
||||
When deployment fails during container creation, the already-created container is not removed, leading to orphaned containers that consume resources.
|
||||
|
||||
### Location
|
||||
`internal/service/deploy/deploy.go:969` in `createAndStartContainer` function
|
||||
|
||||
### Proposed Fix
|
||||
Add cleanup of created container on start failure:
|
||||
|
||||
```go
|
||||
containerID, err := svc.docker.CreateContainer(ctx, containerOpts)
|
||||
if err != nil {
|
||||
svc.notify.NotifyDeployFailed(ctx, app, deployment, err)
|
||||
svc.failDeployment(ctx, app, deployment, fmt.Errorf("failed to create container: %w", err))
|
||||
return "", fmt.Errorf("failed to create container: %w", err)
|
||||
}
|
||||
|
||||
// Add cleanup defer for error cases
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// If we have a container ID but returning an error, clean it up
|
||||
_ = svc.docker.RemoveContainer(context.Background(), containerID, true)
|
||||
}
|
||||
}()
|
||||
|
||||
startErr := svc.docker.StartContainer(ctx, containerID)
|
||||
if startErr != nil {
|
||||
svc.notify.NotifyDeployFailed(ctx, app, deployment, startErr)
|
||||
svc.failDeployment(ctx, app, deployment, fmt.Errorf("failed to start container: %w", startErr))
|
||||
err = startErr // Set err so defer cleanup runs
|
||||
return "", fmt.Errorf("failed to start container: %w", startErr)
|
||||
}
|
||||
```
|
||||
|
||||
## 5. Potential Data Race in Active Deployments Tracking
|
||||
|
||||
### Description
|
||||
The `activeDeploys` sync.Map in the deployment service may have race conditions when multiple concurrent deployments try to access the same app's deployment state.
|
||||
|
||||
### Location
|
||||
`internal/service/deploy/deploy.go:226` and related functions
|
||||
|
||||
### Proposed Fix
|
||||
Add proper locking around active deploy operations:
|
||||
|
||||
```go
|
||||
// Add a mutex for active deploy operations
|
||||
type Service struct {
|
||||
// existing fields...
|
||||
activeDeployMu sync.Mutex
|
||||
}
|
||||
|
||||
// In Deploy function
|
||||
func (svc *Service) Deploy(ctx context.Context, app *models.App, webhookEventID *int64, cancelExisting bool) error {
|
||||
svc.activeDeployMu.Lock()
|
||||
if cancelExisting {
|
||||
svc.cancelActiveDeploy(app.ID)
|
||||
}
|
||||
|
||||
// Try to acquire per-app deployment lock
|
||||
if !svc.tryLockApp(app.ID) {
|
||||
svc.activeDeployMu.Unlock()
|
||||
svc.log.Warn("deployment already in progress", "app", app.Name)
|
||||
return ErrDeploymentInProgress
|
||||
}
|
||||
svc.activeDeployMu.Unlock()
|
||||
|
||||
defer svc.unlockApp(app.ID)
|
||||
// rest of function...
|
||||
}
|
||||
```
|
||||
|
||||
## 6. Incomplete Error Propagation in Git Clone
|
||||
|
||||
### Description
|
||||
In the Docker client's `runGitClone` function, if `ContainerLogs` fails, the error is silently ignored, which could hide important debugging information.
|
||||
|
||||
### Location
|
||||
`internal/docker/client.go:679` in `runGitClone` function
|
||||
|
||||
### Proposed Fix
|
||||
Handle the ContainerLogs error properly:
|
||||
|
||||
```go
|
||||
// Always capture logs for the result
|
||||
logs, logErr := c.ContainerLogs(ctx, containerID, "100")
|
||||
if logErr != nil {
|
||||
c.log.Warn("failed to get git clone logs", "error", logErr)
|
||||
logs = "Failed to retrieve logs: " + logErr.Error()
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user