fix(batch3): HTTP timeout, transient 429 handling, lock file poll fallback
This commit is contained in:
@@ -311,6 +311,10 @@ class StatusBox extends EventEmitter {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
req.setTimeout(30000, () => {
|
||||||
|
req.destroy(new Error('HTTP request timed out after 30s'));
|
||||||
|
});
|
||||||
|
|
||||||
req.on('error', reject);
|
req.on('error', reject);
|
||||||
if (bodyStr) req.write(bodyStr);
|
if (bodyStr) req.write(bodyStr);
|
||||||
req.end();
|
req.end();
|
||||||
|
|||||||
@@ -83,6 +83,9 @@ class StatusWatcher extends EventEmitter {
|
|||||||
children: [],
|
children: [],
|
||||||
idleTimer: null,
|
idleTimer: null,
|
||||||
_lineBuffer: '',
|
_lineBuffer: '',
|
||||||
|
_lockActive: false,
|
||||||
|
_lockExists: undefined,
|
||||||
|
_lockPollTimer: null,
|
||||||
};
|
};
|
||||||
|
|
||||||
this.sessions.set(sessionKey, state);
|
this.sessions.set(sessionKey, state);
|
||||||
@@ -99,6 +102,9 @@ class StatusWatcher extends EventEmitter {
|
|||||||
|
|
||||||
// Start file polling as fallback (fs.watch may not work on bind mounts in Docker)
|
// Start file polling as fallback (fs.watch may not work on bind mounts in Docker)
|
||||||
this._startFilePoll(sessionKey, state);
|
this._startFilePoll(sessionKey, state);
|
||||||
|
|
||||||
|
// Start lock file poll fallback (inotify misses on Docker bind mounts)
|
||||||
|
this._startLockPoll(sessionKey, state);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -131,6 +137,60 @@ class StatusWatcher extends EventEmitter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start polling a lock file for changes (fallback for fs.watch on Docker bind mounts).
|
||||||
|
* Idempotent with fs.watch — if fs.watch already fired, _lockActive is already updated
|
||||||
|
* and no duplicate events are emitted.
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
_startLockPoll(sessionKey, state) {
|
||||||
|
var self = this;
|
||||||
|
var lockFile = state.transcriptFile + '.lock';
|
||||||
|
|
||||||
|
state._lockPollTimer = setInterval(function () {
|
||||||
|
try {
|
||||||
|
var exists = fs.existsSync(lockFile);
|
||||||
|
|
||||||
|
// First poll: just initialize state, don't emit
|
||||||
|
if (state._lockExists === undefined) {
|
||||||
|
state._lockExists = exists;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var changed = exists !== state._lockExists;
|
||||||
|
state._lockExists = exists;
|
||||||
|
|
||||||
|
if (!changed) return;
|
||||||
|
|
||||||
|
if (exists) {
|
||||||
|
// Lock file appeared — session activated by user message
|
||||||
|
if (!state._lockActive) {
|
||||||
|
state._lockActive = true;
|
||||||
|
if (self.logger) {
|
||||||
|
self.logger.info({ sessionKey }, 'Lock poll: lock file appeared — emitting session-lock');
|
||||||
|
}
|
||||||
|
self.emit('session-lock', sessionKey);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Lock file disappeared — turn complete
|
||||||
|
if (state._lockActive) {
|
||||||
|
state._lockActive = false;
|
||||||
|
if (self.logger) {
|
||||||
|
self.logger.info({ sessionKey }, 'Lock poll: lock file removed — emitting session-lock-released');
|
||||||
|
}
|
||||||
|
self.emit('session-lock-released', sessionKey);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (_e) {
|
||||||
|
// Ignore stat errors (file/dir may not exist yet)
|
||||||
|
}
|
||||||
|
}, 1000);
|
||||||
|
|
||||||
|
if (self.logger) {
|
||||||
|
self.logger.info({ sessionKey }, 'Lock poll timer started');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Remove a session from watching.
|
* Remove a session from watching.
|
||||||
* @param {string} sessionKey
|
* @param {string} sessionKey
|
||||||
@@ -141,6 +201,7 @@ class StatusWatcher extends EventEmitter {
|
|||||||
|
|
||||||
if (state.idleTimer) clearTimeout(state.idleTimer);
|
if (state.idleTimer) clearTimeout(state.idleTimer);
|
||||||
if (state._filePollTimer) clearInterval(state._filePollTimer);
|
if (state._filePollTimer) clearInterval(state._filePollTimer);
|
||||||
|
if (state._lockPollTimer) clearInterval(state._lockPollTimer);
|
||||||
|
|
||||||
// Keep fileToSession mapping alive so fs.watch still fires for this file.
|
// Keep fileToSession mapping alive so fs.watch still fires for this file.
|
||||||
// Mark it as a "ghost" — changes trigger 'session-file-changed' so the
|
// Mark it as a "ghost" — changes trigger 'session-file-changed' so the
|
||||||
@@ -212,6 +273,10 @@ class StatusWatcher extends EventEmitter {
|
|||||||
clearInterval(state._filePollTimer);
|
clearInterval(state._filePollTimer);
|
||||||
state._filePollTimer = null;
|
state._filePollTimer = null;
|
||||||
}
|
}
|
||||||
|
if (state._lockPollTimer) {
|
||||||
|
clearInterval(state._lockPollTimer);
|
||||||
|
state._lockPollTimer = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (this.logger) this.logger.info('StatusWatcher stopped');
|
if (this.logger) this.logger.info('StatusWatcher stopped');
|
||||||
}
|
}
|
||||||
@@ -237,6 +302,12 @@ class StatusWatcher extends EventEmitter {
|
|||||||
// Lock file CREATED — gateway started processing user message.
|
// Lock file CREATED — gateway started processing user message.
|
||||||
// Earliest possible signal: fires before any JSONL write.
|
// Earliest possible signal: fires before any JSONL write.
|
||||||
if (sessionKey) {
|
if (sessionKey) {
|
||||||
|
const sessionState = this.sessions.get(sessionKey);
|
||||||
|
// Sync _lockActive so poll fallback stays idempotent
|
||||||
|
if (sessionState && !sessionState._lockActive) {
|
||||||
|
sessionState._lockActive = true;
|
||||||
|
sessionState._lockExists = true;
|
||||||
|
}
|
||||||
if (this.logger) this.logger.info({ sessionKey }, 'Lock file created — session active, triggering early reactivation');
|
if (this.logger) this.logger.info({ sessionKey }, 'Lock file created — session active, triggering early reactivation');
|
||||||
this.emit('session-lock', sessionKey);
|
this.emit('session-lock', sessionKey);
|
||||||
} else {
|
} else {
|
||||||
@@ -246,6 +317,12 @@ class StatusWatcher extends EventEmitter {
|
|||||||
// Lock file DELETED — gateway finished the turn and sent final reply.
|
// Lock file DELETED — gateway finished the turn and sent final reply.
|
||||||
// Immediate idle signal: no need to wait for cache-ttl or 60s timeout.
|
// Immediate idle signal: no need to wait for cache-ttl or 60s timeout.
|
||||||
if (sessionKey) {
|
if (sessionKey) {
|
||||||
|
const sessionState = this.sessions.get(sessionKey);
|
||||||
|
// Sync _lockActive so poll fallback stays idempotent
|
||||||
|
if (sessionState && sessionState._lockActive) {
|
||||||
|
sessionState._lockActive = false;
|
||||||
|
sessionState._lockExists = false;
|
||||||
|
}
|
||||||
if (this.logger) this.logger.info({ sessionKey }, 'Lock file deleted — turn complete, marking session done immediately');
|
if (this.logger) this.logger.info({ sessionKey }, 'Lock file deleted — turn complete, marking session done immediately');
|
||||||
this.emit('session-lock-released', sessionKey);
|
this.emit('session-lock-released', sessionKey);
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -462,11 +462,17 @@ async function startDaemon() {
|
|||||||
}),
|
}),
|
||||||
start_time_ms: state.startTime,
|
start_time_ms: state.startTime,
|
||||||
}).catch(function (err) {
|
}).catch(function (err) {
|
||||||
// If plugin rejects (e.g. session not found), fall back to REST for this session
|
// Only permanently disable plugin mode for hard failures (non-retryable errors).
|
||||||
logger.warn({ sessionKey, err: err.message }, 'Plugin update failed — falling back to REST');
|
// 429 and 5xx are transient — keep plugin mode and retry on next update.
|
||||||
box.usePlugin = false;
|
if (err.statusCode === 429 || (err.statusCode >= 500 && err.statusCode < 600)) {
|
||||||
var fallbackText = buildStatusText(box, state, activeBoxes, watcher, sessionKey);
|
logger.warn({ sessionKey, statusCode: err.statusCode }, 'Plugin API transient error — keeping plugin mode, will retry next update');
|
||||||
sharedStatusBox.updatePost(box.postId, fallbackText).catch(function () {});
|
// do NOT set box.usePlugin = false
|
||||||
|
} else {
|
||||||
|
logger.warn({ sessionKey, err: err.message }, 'Plugin API hard failure — falling back to REST');
|
||||||
|
box.usePlugin = false;
|
||||||
|
var fallbackText = buildStatusText(box, state, activeBoxes, watcher, sessionKey);
|
||||||
|
sharedStatusBox.updatePost(box.postId, fallbackText).catch(function () {});
|
||||||
|
}
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
// REST API fallback: format text and PUT update post
|
// REST API fallback: format text and PUT update post
|
||||||
|
|||||||
Reference in New Issue
Block a user