From 25c5fb685553660c7653fd89099b283d8a7049f6 Mon Sep 17 00:00:00 2001 From: John McLear Date: Sun, 3 May 2026 14:28:48 +0100 Subject: [PATCH 1/2] test(ci): stronger diagnostics for silent backend-test exit PR #7663 added unhandledRejection / uncaughtException handlers in common.ts. The next failure after merge (run 25279692065 - Windows without plugins, Node 24) showed mocha exiting with code 1 mid-suite 261ms after the last passing test, with NEITHER handler firing. So something more drastic is killing the process - SIGKILL, OOM, fatal native error - or mocha itself called process.exit before the JS handlers in common.ts could run. Two issues with the previous attempt: 1. Handlers in common.ts only register when a spec imports common.ts. Only 27 of 47 specs do. If a non-common spec triggers the death, handlers may never have been registered. 2. process.stderr.write is asynchronous on Windows when stderr is piped (which it is under GitHub Actions). On a hard kill the buffered line never reaches the runner log. This patch: - Moves diagnostic handlers to a dedicated tests/backend/diagnostics.ts loaded via mocha --require, so they register at startup before any spec runs. - Uses fs.writeSync(2, ...) for synchronous stderr writes that the kernel completes before returning - the line lands in the log even if the process is killed milliseconds later. - Adds beforeExit / exit / signal handlers so we can discriminate the exit mechanism: clean drain vs process.exit vs SIGKILL vs signal. - Tracks last-seen test via mocha root afterEach hook so the death point is visible in the log. The next CI failure should print enough context to identify the cause, after which we can fix the real bug and drop this file. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/package.json | 2 +- src/tests/backend/diagnostics.ts | 93 ++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 src/tests/backend/diagnostics.ts diff --git a/src/package.json b/src/package.json index e60c37065f5..13b969d2510 100644 --- a/src/package.json +++ b/src/package.json @@ -143,7 +143,7 @@ }, "scripts": { "lint": "eslint .", - "test": "cross-env NODE_ENV=production mocha --import=tsx --timeout 120000 --recursive tests/backend/specs/**.ts ../node_modules/ep_*/static/tests/backend/specs/**", + "test": "cross-env NODE_ENV=production mocha --import=tsx --require ./tests/backend/diagnostics.ts --timeout 120000 --recursive tests/backend/specs/**.ts ../node_modules/ep_*/static/tests/backend/specs/**", "test-utils": "cross-env NODE_ENV=production mocha --import=tsx --timeout 5000 --recursive tests/backend/specs/*utils.ts", "test-container": "mocha --import=tsx --timeout 5000 tests/container/specs/api", "dev": "cross-env NODE_ENV=development node --require tsx/cjs node/server.ts", diff --git a/src/tests/backend/diagnostics.ts b/src/tests/backend/diagnostics.ts new file mode 100644 index 00000000000..912a83d089e --- /dev/null +++ b/src/tests/backend/diagnostics.ts @@ -0,0 +1,93 @@ +'use strict'; + +// Diagnostic-only mocha bootstrap, loaded via `mocha --require ./tests/backend/diagnostics.ts`. +// +// PR #7663 added unhandledRejection / uncaughtException handlers in +// tests/backend/common.ts to surface the silent ~22% backend-test flake. +// The next failure (run 25279692065, Windows without plugins, Node 24) +// showed mocha exit with code 1 mid-suite, 261ms after the last passing +// test, with NEITHER handler firing. This means the process was killed +// in a way that bypassed JS handlers — SIGKILL, OOM, or a fatal native +// error — OR mocha itself called process.exit before the handlers ran. +// +// This file: +// 1. Registers handlers UNCONDITIONALLY at mocha startup (common.ts is +// only imported by ~27 of 47 specs, so its handlers may register +// late or after a death-causing event). +// 2. Writes via fs.writeSync(2, ...) — synchronous stderr writes that +// complete before the kernel returns from the syscall, so the line +// lands in the runner log even if the process is killed +// milliseconds later. +// 3. Tracks the last-seen test via a mocha root afterEach hook so the +// death point is identified. +// 4. Logs exit-related events so we can discriminate: +// beforeExit + exit -> clean event-loop drain +// only exit -> process.exit() called somewhere +// neither -> hard kill (SIGKILL/OOM/runner) +// signal lines -> SIGTERM / SIGINT / SIGBREAK received +// +// Drop this file once the flake's root cause is identified and fixed. + +import {writeSync} from 'node:fs'; + +const t0 = Date.now(); +let lastSeenTest = ''; + +const diag = (msg: string): void => { + const line = `[diag +${Date.now() - t0}ms] ${msg}\n`; + try { + writeSync(2, line); + } catch (_) { + // Best-effort: if stderr is closed there is nothing we can do. + } +}; + +diag('diagnostics loaded'); + +process.on('unhandledRejection', (reason: any) => { + diag(`unhandledRejection: ${ + reason && reason.stack ? reason.stack : String(reason) + } (lastTest="${lastSeenTest}")`); + // Re-throw so existing common.ts handlers / mocha behavior is preserved. + throw reason; +}); + +process.on('uncaughtException', (err: any) => { + diag(`uncaughtException: ${ + err && err.stack ? err.stack : String(err) + } (lastTest="${lastSeenTest}")`); + // Don't process.exit here — let mocha or the existing common.ts handler + // decide. We just want the diagnostic line out first. +}); + +process.on('beforeExit', (code: number) => { + diag(`beforeExit code=${code} exitCode=${process.exitCode} ` + + `lastTest="${lastSeenTest}"`); +}); + +process.on('exit', (code: number) => { + diag(`exit code=${code} lastTest="${lastSeenTest}"`); +}); + +for (const sig of ['SIGINT', 'SIGTERM', 'SIGHUP', 'SIGBREAK'] as const) { + // SIGHUP / SIGBREAK don't exist on every platform; ignore registration errors. + try { + process.on(sig as any, () => { + diag(`received ${sig} (lastTest="${lastSeenTest}")`); + // Let the default behavior (exit) happen. + process.exit(128); + }); + } catch (_) { + // ignore + } +} + +// Mocha root hook — only registered if mocha picks up this file via --require. +// We track the most recently-finished test so the death point is visible. +export const mochaHooks = { + afterEach(this: any) { + if (this.currentTest) { + lastSeenTest = this.currentTest.fullTitle(); + } + }, +}; From b52bd2abfd9b8dd222bf0d72fe9cc79362b0d928 Mon Sep 17 00:00:00 2001 From: John McLear Date: Sun, 3 May 2026 14:40:07 +0100 Subject: [PATCH 2/2] fix(diagnostics): exit(1) on uncaughtException so fatal errors fail fast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Qodo flagged on PR #7665: the uncaughtException handler in tests/backend/diagnostics.ts only logged and returned. Once a handler is registered, Node no longer exits on its own. Specs that don't import tests/backend/common.ts (20 of 47) have only this handler — so a fatal error would have been swallowed and tests would limp along instead of failing fast. Mirror common.ts and call process.exit(1) after logging. --- src/tests/backend/diagnostics.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/tests/backend/diagnostics.ts b/src/tests/backend/diagnostics.ts index 912a83d089e..ac29ab1980c 100644 --- a/src/tests/backend/diagnostics.ts +++ b/src/tests/backend/diagnostics.ts @@ -56,8 +56,11 @@ process.on('uncaughtException', (err: any) => { diag(`uncaughtException: ${ err && err.stack ? err.stack : String(err) } (lastTest="${lastSeenTest}")`); - // Don't process.exit here — let mocha or the existing common.ts handler - // decide. We just want the diagnostic line out first. + // Force fail-fast. Specs that don't import common.ts only have THIS handler, + // and Node won't exit on its own once an uncaughtException listener is + // registered. Without the explicit exit a fatal error would be swallowed. + // common.ts has the same process.exit(1); whichever handler runs first wins. + process.exit(1); }); process.on('beforeExit', (code: number) => {