From 0b39b39f3b0f3f84fe48ce237032b02868a3f17a Mon Sep 17 00:00:00 2001 From: Xen Date: Mon, 9 Mar 2026 19:43:30 +0000 Subject: [PATCH] =?UTF-8?q?fix(batch1):=20safe=20fixes=20=E2=80=94=20saveO?= =?UTF-8?q?ffsets=20ordering,=20pid=20atomic,=20error=20handlers,=20go=20m?= =?UTF-8?q?utex,=20frontend=20cleanup?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugin/server/api.go | 10 ++++++++ plugin/server/plugin.go | 13 +++++++++- plugin/webapp/src/index.tsx | 18 ++++++++++++- src/watcher-manager.js | 50 +++++++++++++++++++++++++++++++------ 4 files changed, 81 insertions(+), 10 deletions(-) diff --git a/plugin/server/api.go b/plugin/server/api.go index bc025d2..b8bbe4f 100644 --- a/plugin/server/api.go +++ b/plugin/server/api.go @@ -122,6 +122,16 @@ func (p *Plugin) handleCreateSession(w http.ResponseWriter, r *http.Request) { return } + // Validate KV key length — Mattermost enforces a 50-char limit. + // Encoded key = kvPrefix (11 chars) + url.PathEscape(sessionKey). + // Exceeding the limit causes KVSet to silently succeed but never store data. + if len(encodeKey(req.SessionKey)) > 50 { + writeJSON(w, http.StatusBadRequest, map[string]string{ + "error": fmt.Sprintf("session_key too long: encoded key length %d exceeds 50-char KV limit", len(encodeKey(req.SessionKey))), + }) + return + } + // Check max active sessions config := p.getConfiguration() allSessions, _ := p.store.ListAllSessions() diff --git a/plugin/server/plugin.go b/plugin/server/plugin.go index 3350a2d..6ec659f 100644 --- a/plugin/server/plugin.go +++ b/plugin/server/plugin.go @@ -21,6 +21,9 @@ type Plugin struct { // store wraps KV store operations for session persistence. store *Store + // botUserIDLock synchronizes access to botUserID. + botUserIDLock sync.RWMutex + // botUserID is the plugin's bot user ID (created on activation). botUserID string @@ -41,7 +44,9 @@ func (p *Plugin) OnActivate() error { if appErr != nil { p.API.LogWarn("Failed to ensure bot user", "error", appErr.Error()) } else { + p.botUserIDLock.Lock() p.botUserID = botID + p.botUserIDLock.Unlock() p.API.LogInfo("Plugin bot user ensured", "botUserID", botID) } @@ -75,8 +80,10 @@ func (p *Plugin) sessionCleanupLoop() { } } -// getBotUserID returns the plugin's bot user ID. +// getBotUserID returns the plugin's bot user ID (thread-safe). func (p *Plugin) getBotUserID() string { + p.botUserIDLock.RLock() + defer p.botUserIDLock.RUnlock() return p.botUserID } @@ -93,6 +100,10 @@ func (p *Plugin) OnDeactivate() error { p.API.LogWarn("Failed to list sessions on deactivate", "error", err.Error()) } else { for _, s := range sessions { + // Skip sessions already in a terminal state — do not overwrite done/error + if s.Status == "done" || s.Status == "error" { + continue + } s.Status = "interrupted" _ = p.store.SaveSession(s.SessionKey, s) p.broadcastUpdate(s.ChannelID, s) diff --git a/plugin/webapp/src/index.tsx b/plugin/webapp/src/index.tsx index ccc40c3..ea53065 100644 --- a/plugin/webapp/src/index.tsx +++ b/plugin/webapp/src/index.tsx @@ -61,6 +61,13 @@ class LiveStatusPlugin { window.__livestatus_updates[data.post_id] = update; + // Evict completed sessions from the update cache after 60s to prevent unbounded growth + if (data.status === 'done' || data.status === 'error' || data.status === 'interrupted') { + setTimeout(() => { + delete window.__livestatus_updates[data.post_id]; + }, 60000); + } + // Notify post-specific listeners const listeners = window.__livestatus_listeners[data.post_id]; if (listeners) { @@ -82,7 +89,16 @@ class LiveStatusPlugin { } uninitialize(): void { - // Cleanup handled by Mattermost plugin framework + // Clear global listener and update stores to prevent accumulation across reloads + window.__livestatus_listeners = {}; + window.__livestatus_updates = {}; + + // Unregister the custom post type component if it was registered + if (this.postTypeComponentId) { + // registry is not available here — Mattermost framework cleans up on deactivate. + // Clearing postTypeComponentId prevents stale references. + this.postTypeComponentId = null; + } } } diff --git a/src/watcher-manager.js b/src/watcher-manager.js index 04103ed..3ce7dac 100644 --- a/src/watcher-manager.js +++ b/src/watcher-manager.js @@ -47,8 +47,19 @@ if (cmd === 'start') { // ---- PID File helpers ---- function writePidFile(pidFile) { - // eslint-disable-next-line security/detect-non-literal-fs-filename - fs.writeFileSync(pidFile, String(process.pid), 'utf8'); + try { + // eslint-disable-next-line security/detect-non-literal-fs-filename + const fd = fs.openSync(pidFile, 'wx'); + fs.writeSync(fd, String(process.pid)); + fs.closeSync(fd); + } catch (err) { + if (err.code === 'EEXIST') { + // Another process won the race — bail out + console.error('PID file already exists — another daemon may be running'); + process.exit(1); + } + throw err; + } } function readPidFile(pidFile) { @@ -100,8 +111,10 @@ function saveOffsets(offsetFile, sessions) { try { // eslint-disable-next-line security/detect-non-literal-fs-filename fs.writeFileSync(offsetFile, JSON.stringify(offsets, null, 2), 'utf8'); - } catch (_e) { - /* ignore write error */ + } catch (writeErr) { + // Log disk-full / permission errors so they are visible in daemon logs + // eslint-disable-next-line no-console + console.warn('[saveOffsets] Failed to write offset file:', writeErr.message); } } @@ -115,6 +128,16 @@ async function startDaemon() { const config = getConfig(); const logger = getLogger(); + // Global error handlers — prevent silent daemon death from unhandled rejections + process.on('unhandledRejection', (reason) => { + logger.error({ reason }, 'Unhandled promise rejection — shutting down'); + shutdown().finally(() => process.exit(1)); + }); + process.on('uncaughtException', (err) => { + logger.error({ err }, 'Uncaught exception — shutting down'); + shutdown().finally(() => process.exit(1)); + }); + // Check if already running const existingPid = readPidFile(config.pidFile); if (existingPid && isProcessRunning(existingPid)) { @@ -459,7 +482,7 @@ async function startDaemon() { // ---- Session Idle (from watcher) ---- watcher.on('session-idle', async (sessionKey, state) => { - const box = activeBoxes.get(sessionKey); + const box = activeBoxes.get(sessionKey); // snapshot at entry if (!box) { // Sub-agent completed updateParentWithChild(activeBoxes, watcher, sharedStatusBox, pluginClient, sessionKey, state, logger); @@ -492,12 +515,23 @@ async function startDaemon() { logger.error({ sessionKey, err }, 'Failed to update final status'); } + // Guard: if the box was replaced during the awaits above (new session reactivated), + // skip cleanup to avoid killing the newly re-added session. + if (activeBoxes.get(sessionKey) !== box) { + logger.info({ sessionKey }, 'session-idle: box replaced during await — skipping cleanup (new session active)'); + return; + } + // Save to completedBoxes so we can reuse the post ID if the session reactivates completedBoxes.set(sessionKey, { postId: box.postId, lastOffset: state.lastOffset || 0, }); + // Persist final offsets BEFORE removing session from watcher so the + // completed session's last offset is captured in the snapshot. + saveOffsets(config.offsetFile, watcher.sessions); + // Clean up active tracking activeBoxes.delete(sessionKey); watcher.removeSession(sessionKey); @@ -505,9 +539,6 @@ async function startDaemon() { // ensures we reuse the existing post instead of creating a new one. monitor.forgetSession(sessionKey); globalMetrics.activeSessions = activeBoxes.size; - - // Persist final offsets - saveOffsets(config.offsetFile, watcher.sessions); }); // ---- Start all subsystems ---- @@ -534,6 +565,9 @@ async function startDaemon() { logger.info({ signal }, 'Shutting down gracefully'); + // Persist offsets for all active sessions before stopping the watcher + saveOffsets(config.offsetFile, watcher.sessions); + // Stop accepting new sessions monitor.stop(); watcher.stop();