Compare commits
1 Commits
main
...
schema-con
| Author | SHA1 | Date | |
|---|---|---|---|
| 4e6542badf |
180
BUGS.md
Normal file
180
BUGS.md
Normal file
@ -0,0 +1,180 @@
|
||||
# Bugs in µPaaS
|
||||
|
||||
## 1. Potential Race Condition in Log Writing
|
||||
|
||||
### Description
|
||||
In the deployment service, when a deployment fails, the `failDeployment` function calls `writeLogsToFile` which may be called concurrently with the async log writer's flush operations. This could lead to partial or corrupted log files.
|
||||
|
||||
### Location
|
||||
`internal/service/deploy/deploy.go:1169` in `failDeployment` function
|
||||
|
||||
### Proposed Fix
|
||||
1. Add synchronization to ensure only one log write operation occurs at a time
|
||||
2. Modify the `deploymentLogWriter` to track completion status and prevent concurrent writes
|
||||
3. Add a wait mechanism in `failDeployment` to ensure any ongoing flush operations complete before writing logs to file
|
||||
|
||||
```go
|
||||
// Add a mutex to deploymentLogWriter
|
||||
type deploymentLogWriter struct {
|
||||
// existing fields...
|
||||
mu sync.Mutex
|
||||
writeMu sync.Mutex // Add this for file writing synchronization
|
||||
done chan struct{}
|
||||
flushed sync.WaitGroup
|
||||
}
|
||||
|
||||
// In writeLogsToFile, ensure exclusive access
|
||||
func (svc *Service) writeLogsToFile(app *models.App, deployment *models.Deployment) {
|
||||
svc.writeMu.Lock() // Add this mutex to Service struct
|
||||
defer svc.writeMu.Unlock()
|
||||
// existing code...
|
||||
}
|
||||
```
|
||||
|
||||
## 2. Incomplete Error Handling in Container Operations
|
||||
|
||||
### Description
|
||||
In the Docker client's `performClone` function, if `createGitContainer` fails, the SSH key file created earlier is not cleaned up, causing a potential security risk.
|
||||
|
||||
### Location
|
||||
`internal/docker/client.go:597` in `performClone` function
|
||||
|
||||
### Proposed Fix
|
||||
Add proper cleanup using `defer` immediately after creating the SSH key file:
|
||||
|
||||
```go
|
||||
// After writing SSH key file (line 578)
|
||||
keyFileCreated := false
|
||||
err = os.WriteFile(cfg.keyFile, []byte(cfg.sshPrivateKey), sshKeyPermissions)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to write SSH key: %w", err)
|
||||
}
|
||||
keyFileCreated = true
|
||||
|
||||
defer func() {
|
||||
if keyFileCreated {
|
||||
removeErr := os.Remove(cfg.keyFile)
|
||||
if removeErr != nil {
|
||||
c.log.Error("failed to remove SSH key file", "error", removeErr)
|
||||
}
|
||||
}
|
||||
}()
|
||||
```
|
||||
|
||||
## 3. Missing Context Cancellation Check During Build
|
||||
|
||||
### Description
|
||||
In the deployment service's `streamBuildOutput` function, long-running Docker build operations may not properly respond to context cancellation, causing deployments to hang even when cancelled.
|
||||
|
||||
### Location
|
||||
`internal/docker/client.go:542` in `streamBuildOutput` function
|
||||
|
||||
### Proposed Fix
|
||||
Add context checking in the scanner loop:
|
||||
|
||||
```go
|
||||
for scanner.Scan() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
line := scanner.Bytes()
|
||||
// existing code...
|
||||
}
|
||||
```
|
||||
|
||||
## 4. Inconsistent Container Removal in Error Cases
|
||||
|
||||
### Description
|
||||
When deployment fails during container creation, the already-created container is not removed, leading to orphaned containers that consume resources.
|
||||
|
||||
### Location
|
||||
`internal/service/deploy/deploy.go:969` in `createAndStartContainer` function
|
||||
|
||||
### Proposed Fix
|
||||
Add cleanup of created container on start failure:
|
||||
|
||||
```go
|
||||
containerID, err := svc.docker.CreateContainer(ctx, containerOpts)
|
||||
if err != nil {
|
||||
svc.notify.NotifyDeployFailed(ctx, app, deployment, err)
|
||||
svc.failDeployment(ctx, app, deployment, fmt.Errorf("failed to create container: %w", err))
|
||||
return "", fmt.Errorf("failed to create container: %w", err)
|
||||
}
|
||||
|
||||
// Add cleanup defer for error cases
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// If we have a container ID but returning an error, clean it up
|
||||
_ = svc.docker.RemoveContainer(context.Background(), containerID, true)
|
||||
}
|
||||
}()
|
||||
|
||||
startErr := svc.docker.StartContainer(ctx, containerID)
|
||||
if startErr != nil {
|
||||
svc.notify.NotifyDeployFailed(ctx, app, deployment, startErr)
|
||||
svc.failDeployment(ctx, app, deployment, fmt.Errorf("failed to start container: %w", startErr))
|
||||
err = startErr // Set err so defer cleanup runs
|
||||
return "", fmt.Errorf("failed to start container: %w", startErr)
|
||||
}
|
||||
```
|
||||
|
||||
## 5. Potential Data Race in Active Deployments Tracking
|
||||
|
||||
### Description
|
||||
The `activeDeploys` sync.Map in the deployment service may have race conditions when multiple concurrent deployments try to access the same app's deployment state.
|
||||
|
||||
### Location
|
||||
`internal/service/deploy/deploy.go:226` and related functions
|
||||
|
||||
### Proposed Fix
|
||||
Add proper locking around active deploy operations:
|
||||
|
||||
```go
|
||||
// Add a mutex for active deploy operations
|
||||
type Service struct {
|
||||
// existing fields...
|
||||
activeDeployMu sync.Mutex
|
||||
}
|
||||
|
||||
// In Deploy function
|
||||
func (svc *Service) Deploy(ctx context.Context, app *models.App, webhookEventID *int64, cancelExisting bool) error {
|
||||
svc.activeDeployMu.Lock()
|
||||
if cancelExisting {
|
||||
svc.cancelActiveDeploy(app.ID)
|
||||
}
|
||||
|
||||
// Try to acquire per-app deployment lock
|
||||
if !svc.tryLockApp(app.ID) {
|
||||
svc.activeDeployMu.Unlock()
|
||||
svc.log.Warn("deployment already in progress", "app", app.Name)
|
||||
return ErrDeploymentInProgress
|
||||
}
|
||||
svc.activeDeployMu.Unlock()
|
||||
|
||||
defer svc.unlockApp(app.ID)
|
||||
// rest of function...
|
||||
}
|
||||
```
|
||||
|
||||
## 6. Incomplete Error Propagation in Git Clone
|
||||
|
||||
### Description
|
||||
In the Docker client's `runGitClone` function, if `ContainerLogs` fails, the error is silently ignored, which could hide important debugging information.
|
||||
|
||||
### Location
|
||||
`internal/docker/client.go:679` in `runGitClone` function
|
||||
|
||||
### Proposed Fix
|
||||
Handle the ContainerLogs error properly:
|
||||
|
||||
```go
|
||||
// Always capture logs for the result
|
||||
logs, logErr := c.ContainerLogs(ctx, containerID, "100")
|
||||
if logErr != nil {
|
||||
c.log.Warn("failed to get git clone logs", "error", logErr)
|
||||
logs = "Failed to retrieve logs: " + logErr.Error()
|
||||
}
|
||||
```
|
||||
68
CLAUDE.md
Normal file
68
CLAUDE.md
Normal file
@ -0,0 +1,68 @@
|
||||
# Repository Rules
|
||||
|
||||
Last Updated 2026-01-08
|
||||
|
||||
These rules MUST be followed at all times, it is very important.
|
||||
|
||||
* Never use `git add -A` - add specific changes to a deliberate commit. A
|
||||
commit should contain one change. After each change, make a commit with a
|
||||
good one-line summary.
|
||||
|
||||
* NEVER modify the linter config without asking first.
|
||||
|
||||
* NEVER modify tests to exclude special cases or otherwise get them to pass
|
||||
without asking first. In almost all cases, the code should be changed,
|
||||
NOT the tests. If you think the test needs to be changed, make your case
|
||||
for that and ask for permission to proceed, then stop. You need explicit
|
||||
user approval to modify existing tests. (You do not need user approval
|
||||
for writing NEW tests.)
|
||||
|
||||
* When linting, assume the linter config is CORRECT, and that each item
|
||||
output by the linter is something that legitimately needs fixing in the
|
||||
code.
|
||||
|
||||
* When running tests, use `make test`.
|
||||
|
||||
* Before commits, run `make check`. This runs `make lint` and `make test`
|
||||
and `make check-fmt`. Any issues discovered MUST be resolved before
|
||||
committing unless explicitly told otherwise.
|
||||
|
||||
* When fixing a bug, write a failing test for the bug FIRST. Add
|
||||
appropriate logging to the test to ensure it is written correctly. Commit
|
||||
that. Then go about fixing the bug until the test passes (without
|
||||
modifying the test further). Then commit that.
|
||||
|
||||
* When adding a new feature, do the same - implement a test first (TDD). It
|
||||
doesn't have to be super complex. Commit the test, then commit the
|
||||
feature.
|
||||
|
||||
* When adding a new feature, use a feature branch. When the feature is
|
||||
completely finished and the code is up to standards (passes `make check`)
|
||||
then and only then can the feature branch be merged into `main` and the
|
||||
branch deleted.
|
||||
|
||||
* Write godoc documentation comments for all exported types and functions as
|
||||
you go along.
|
||||
|
||||
* ALWAYS be consistent in naming. If you name something one thing in one
|
||||
place, name it the EXACT SAME THING in another place.
|
||||
|
||||
* Be descriptive and specific in naming. `wl` is bad;
|
||||
`SourceHostWhitelist` is good. `ConnsPerHost` is bad;
|
||||
`MaxConnectionsPerHost` is good.
|
||||
|
||||
* This is not prototype or teaching code - this is designed for production.
|
||||
Any security issues (such as denial of service) or other web
|
||||
vulnerabilities are P1 bugs and must be added to TODO.md at the top.
|
||||
|
||||
* As this is production code, no stubbing of implementations unless
|
||||
specifically instructed. We need working implementations.
|
||||
|
||||
* Avoid vendoring deps unless specifically instructed to. NEVER commit
|
||||
the vendor directory, NEVER commit compiled binaries. If these
|
||||
directories or files exist, add them to .gitignore (and commit the
|
||||
.gitignore) if they are not already in there. Keep the entire git
|
||||
repository (with history) small - under 20MiB, unless you specifically
|
||||
must commit larger files (e.g. test fixture example media files). Only
|
||||
OUR source code and immediately supporting files (such as test examples)
|
||||
goes into the repo/history.
|
||||
6
internal/database/migrations/001_init_migrations.sql
Normal file
6
internal/database/migrations/001_init_migrations.sql
Normal file
@ -0,0 +1,6 @@
|
||||
-- Initialize migrations table for tracking applied migrations
|
||||
CREATE TABLE IF NOT EXISTS migrations (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL UNIQUE,
|
||||
applied_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
@ -1,7 +1,8 @@
|
||||
-- Initial schema for upaas
|
||||
-- Complete schema for upaas (consolidated)
|
||||
-- This represents the final state of all migrations applied
|
||||
|
||||
-- Users table (single admin user)
|
||||
CREATE TABLE users (
|
||||
CREATE TABLE IF NOT EXISTS users (
|
||||
id INTEGER PRIMARY KEY,
|
||||
username TEXT UNIQUE NOT NULL,
|
||||
password_hash TEXT NOT NULL,
|
||||
@ -9,7 +10,7 @@ CREATE TABLE users (
|
||||
);
|
||||
|
||||
-- Apps table
|
||||
CREATE TABLE apps (
|
||||
CREATE TABLE IF NOT EXISTS apps (
|
||||
id TEXT PRIMARY KEY,
|
||||
name TEXT UNIQUE NOT NULL,
|
||||
repo_url TEXT NOT NULL,
|
||||
@ -18,18 +19,19 @@ CREATE TABLE apps (
|
||||
webhook_secret TEXT NOT NULL,
|
||||
ssh_private_key TEXT NOT NULL,
|
||||
ssh_public_key TEXT NOT NULL,
|
||||
container_id TEXT,
|
||||
image_id TEXT,
|
||||
previous_image_id TEXT,
|
||||
status TEXT DEFAULT 'pending',
|
||||
docker_network TEXT,
|
||||
ntfy_topic TEXT,
|
||||
slack_webhook TEXT,
|
||||
webhook_secret_hash TEXT NOT NULL DEFAULT '',
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- App environment variables
|
||||
CREATE TABLE app_env_vars (
|
||||
CREATE TABLE IF NOT EXISTS app_env_vars (
|
||||
id INTEGER PRIMARY KEY,
|
||||
app_id TEXT NOT NULL REFERENCES apps(id) ON DELETE CASCADE,
|
||||
key TEXT NOT NULL,
|
||||
@ -38,7 +40,7 @@ CREATE TABLE app_env_vars (
|
||||
);
|
||||
|
||||
-- App labels
|
||||
CREATE TABLE app_labels (
|
||||
CREATE TABLE IF NOT EXISTS app_labels (
|
||||
id INTEGER PRIMARY KEY,
|
||||
app_id TEXT NOT NULL REFERENCES apps(id) ON DELETE CASCADE,
|
||||
key TEXT NOT NULL,
|
||||
@ -47,7 +49,7 @@ CREATE TABLE app_labels (
|
||||
);
|
||||
|
||||
-- App volume mounts
|
||||
CREATE TABLE app_volumes (
|
||||
CREATE TABLE IF NOT EXISTS app_volumes (
|
||||
id INTEGER PRIMARY KEY,
|
||||
app_id TEXT NOT NULL REFERENCES apps(id) ON DELETE CASCADE,
|
||||
host_path TEXT NOT NULL,
|
||||
@ -55,13 +57,24 @@ CREATE TABLE app_volumes (
|
||||
readonly INTEGER DEFAULT 0
|
||||
);
|
||||
|
||||
-- App port mappings
|
||||
CREATE TABLE IF NOT EXISTS app_ports (
|
||||
id INTEGER PRIMARY KEY,
|
||||
app_id TEXT NOT NULL REFERENCES apps(id) ON DELETE CASCADE,
|
||||
host_port INTEGER NOT NULL,
|
||||
container_port INTEGER NOT NULL,
|
||||
protocol TEXT NOT NULL DEFAULT 'tcp' CHECK(protocol IN ('tcp', 'udp')),
|
||||
UNIQUE(host_port, protocol)
|
||||
);
|
||||
|
||||
-- Webhook events log
|
||||
CREATE TABLE webhook_events (
|
||||
CREATE TABLE IF NOT EXISTS webhook_events (
|
||||
id INTEGER PRIMARY KEY,
|
||||
app_id TEXT NOT NULL REFERENCES apps(id) ON DELETE CASCADE,
|
||||
event_type TEXT NOT NULL,
|
||||
branch TEXT NOT NULL,
|
||||
commit_sha TEXT,
|
||||
commit_url TEXT,
|
||||
payload TEXT,
|
||||
matched INTEGER NOT NULL,
|
||||
processed INTEGER DEFAULT 0,
|
||||
@ -69,13 +82,13 @@ CREATE TABLE webhook_events (
|
||||
);
|
||||
|
||||
-- Deployments log
|
||||
CREATE TABLE deployments (
|
||||
CREATE TABLE IF NOT EXISTS deployments (
|
||||
id INTEGER PRIMARY KEY,
|
||||
app_id TEXT NOT NULL REFERENCES apps(id) ON DELETE CASCADE,
|
||||
webhook_event_id INTEGER REFERENCES webhook_events(id),
|
||||
commit_sha TEXT,
|
||||
commit_url TEXT,
|
||||
image_id TEXT,
|
||||
container_id TEXT,
|
||||
status TEXT NOT NULL,
|
||||
logs TEXT,
|
||||
started_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
@ -83,12 +96,14 @@ CREATE TABLE deployments (
|
||||
);
|
||||
|
||||
-- Indexes
|
||||
CREATE INDEX idx_apps_status ON apps(status);
|
||||
CREATE INDEX idx_apps_webhook_secret ON apps(webhook_secret);
|
||||
CREATE INDEX idx_app_env_vars_app_id ON app_env_vars(app_id);
|
||||
CREATE INDEX idx_app_labels_app_id ON app_labels(app_id);
|
||||
CREATE INDEX idx_app_volumes_app_id ON app_volumes(app_id);
|
||||
CREATE INDEX idx_webhook_events_app_id ON webhook_events(app_id);
|
||||
CREATE INDEX idx_webhook_events_created_at ON webhook_events(created_at);
|
||||
CREATE INDEX idx_deployments_app_id ON deployments(app_id);
|
||||
CREATE INDEX idx_deployments_started_at ON deployments(started_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_apps_status ON apps(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_apps_webhook_secret ON apps(webhook_secret);
|
||||
CREATE INDEX IF NOT EXISTS idx_apps_webhook_secret_hash ON apps(webhook_secret_hash);
|
||||
CREATE INDEX IF NOT EXISTS idx_app_env_vars_app_id ON app_env_vars(app_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_app_labels_app_id ON app_labels(app_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_app_volumes_app_id ON app_volumes(app_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_app_ports_app_id ON app_ports(app_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_webhook_events_app_id ON webhook_events(app_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_webhook_events_created_at ON webhook_events(created_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_deployments_app_id ON deployments(app_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_deployments_started_at ON deployments(started_at);
|
||||
@ -1,44 +0,0 @@
|
||||
-- Remove container_id from apps table
|
||||
-- Container is now looked up via Docker label (upaas.id) instead of stored in database
|
||||
|
||||
-- SQLite doesn't support DROP COLUMN before version 3.35.0 (2021-03-12)
|
||||
-- Use table rebuild for broader compatibility
|
||||
|
||||
-- Create new table without container_id
|
||||
CREATE TABLE apps_new (
|
||||
id TEXT PRIMARY KEY,
|
||||
name TEXT UNIQUE NOT NULL,
|
||||
repo_url TEXT NOT NULL,
|
||||
branch TEXT NOT NULL DEFAULT 'main',
|
||||
dockerfile_path TEXT DEFAULT 'Dockerfile',
|
||||
webhook_secret TEXT NOT NULL,
|
||||
ssh_private_key TEXT NOT NULL,
|
||||
ssh_public_key TEXT NOT NULL,
|
||||
image_id TEXT,
|
||||
status TEXT DEFAULT 'pending',
|
||||
docker_network TEXT,
|
||||
ntfy_topic TEXT,
|
||||
slack_webhook TEXT,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Copy data (excluding container_id)
|
||||
INSERT INTO apps_new (
|
||||
id, name, repo_url, branch, dockerfile_path, webhook_secret,
|
||||
ssh_private_key, ssh_public_key, image_id, status,
|
||||
docker_network, ntfy_topic, slack_webhook, created_at, updated_at
|
||||
)
|
||||
SELECT
|
||||
id, name, repo_url, branch, dockerfile_path, webhook_secret,
|
||||
ssh_private_key, ssh_public_key, image_id, status,
|
||||
docker_network, ntfy_topic, slack_webhook, created_at, updated_at
|
||||
FROM apps;
|
||||
|
||||
-- Drop old table and rename new one
|
||||
DROP TABLE apps;
|
||||
ALTER TABLE apps_new RENAME TO apps;
|
||||
|
||||
-- Recreate indexes
|
||||
CREATE INDEX idx_apps_status ON apps(status);
|
||||
CREATE INDEX idx_apps_webhook_secret ON apps(webhook_secret);
|
||||
@ -1,12 +0,0 @@
|
||||
-- Add port mappings for apps
|
||||
|
||||
CREATE TABLE app_ports (
|
||||
id INTEGER PRIMARY KEY,
|
||||
app_id TEXT NOT NULL REFERENCES apps(id) ON DELETE CASCADE,
|
||||
host_port INTEGER NOT NULL,
|
||||
container_port INTEGER NOT NULL,
|
||||
protocol TEXT NOT NULL DEFAULT 'tcp' CHECK(protocol IN ('tcp', 'udp')),
|
||||
UNIQUE(host_port, protocol)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_app_ports_app_id ON app_ports(app_id);
|
||||
@ -1,3 +0,0 @@
|
||||
-- Add commit_url column to webhook_events and deployments tables
|
||||
ALTER TABLE webhook_events ADD COLUMN commit_url TEXT;
|
||||
ALTER TABLE deployments ADD COLUMN commit_url TEXT;
|
||||
@ -1,2 +0,0 @@
|
||||
-- Add webhook_secret_hash column for constant-time secret lookup
|
||||
ALTER TABLE apps ADD COLUMN webhook_secret_hash TEXT NOT NULL DEFAULT '';
|
||||
@ -1,2 +0,0 @@
|
||||
-- Add previous_image_id to apps for deployment rollback support
|
||||
ALTER TABLE apps ADD COLUMN previous_image_id TEXT;
|
||||
Loading…
Reference in New Issue
Block a user