Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions actions/setup/js/copilot_harness.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ const MAX_SCHEDULED_EXIT2_RETRIES = 1;
const PROMPT_FILE_INLINE_THRESHOLD_BYTES = 100 * 1024;
const PROMPT_FILE_INLINE_THRESHOLD_LABEL = "100KB";
const MAX_ENV_VAR_PREVIEW_LENGTH = 120;
const OUTPUT_TAIL_MAX_CHARS = 600;
const OUTPUT_TAIL_MAX_LINES = 12;
// Pattern to detect transient CAPIError 400 in copilot output
const CAPI_ERROR_400_PATTERN = /CAPIError:\s*400/;

Expand Down Expand Up @@ -112,6 +114,13 @@ const AUTHENTICATION_FAILED_PATTERN = /Authentication failed(?:\s*\(Request ID:[
const INFERENCE_ACCESS_ERROR_PATTERN = /Access denied by policy settings|invalid access to inference/;
// Pattern: Agentic engine process killed by signal (timeout)
const AGENTIC_ENGINE_TIMEOUT_PATTERN = /signal=SIG(?:TERM|KILL|INT)/;
// Pattern: Copilot SDK driver timed out waiting for the session to become idle.
const SDK_SESSION_IDLE_TIMEOUT_PATTERN = /Timeout after \d+ms waiting for session\.idle/;
// Pattern: MCP gateway shutdown surfaced in agent output.
// Anchored to the JSON "message" key emitted by the MCP gateway driver to
// avoid false positives from any process that logs "Gateway shutdown initiated"
// as plain text.
const MCP_GATEWAY_SHUTDOWN_PATTERN = /"message"\s*:\s*"Gateway shutdown initiated"/;

// Pattern to detect null-type tool_call error that poisons conversation history.
// Matches the Copilot API 400 error:
Expand Down Expand Up @@ -260,6 +269,82 @@ function isAuthenticationFailedError(output) {
return AUTHENTICATION_FAILED_PATTERN.test(output);
}

/**
* Determines if the collected output contains a Copilot SDK session.idle timeout.
* @param {string} output
* @returns {boolean}
*/
function isSDKSessionIdleTimeoutError(output) {
return SDK_SESSION_IDLE_TIMEOUT_PATTERN.test(output);
}

/**
* Determines if the collected output contains an MCP gateway shutdown message.
* @param {string} output
* @returns {boolean}
*/
function isMCPGatewayShutdownError(output) {
return MCP_GATEWAY_SHUTDOWN_PATTERN.test(output);
}

/**
* Extract a compact tail preview from combined process output for failure logs.
* @param {string} output
* @param {{ maxChars?: number, maxLines?: number }} [options]
* @returns {string}
*/
function extractOutputTail(output, options) {
if (typeof output !== "string" || !output) return "";
const maxChars = options?.maxChars ?? OUTPUT_TAIL_MAX_CHARS;
const maxLines = options?.maxLines ?? OUTPUT_TAIL_MAX_LINES;
const normalized = output.replace(/\0/g, "").replace(/\r\n/g, "\n").replace(/\r/g, "\n").trim();
if (!normalized) return "";
// filter(Boolean) removes empty strings from blank lines after trimEnd(); maxLines therefore counts non-empty lines.
const tailLines = normalized
.split("\n")
.map(line => line.trimEnd())
.filter(Boolean)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

filter(Boolean) removes blank lines before slice(-maxLines)maxLines counts non-empty lines, not total lines, so callers silently get fewer context lines than requested.

💡 Details
.filter(Boolean)      // blank lines dropped here
.slice(-maxLines);    // maxLines applied to surviving lines only

If the last section of output is:

Error: timeout\n\n\nStack trace line 1\nStack trace line 2

...after filter(Boolean) you have 3 non-empty lines. A caller requesting maxLines: 2 sees only the two stack trace lines with no indication that the separating blank lines — and thus the visual structure of the error — were dropped.

If blank lines should be preserved, remove filter(Boolean). If they should be collapsed, document that maxLines applies after filtering so callers aren't surprised by unexpectedly thin output.

.slice(-maxLines);
if (tailLines.length === 0) return "";
let tail = tailLines.join("\n");
if (tail.length > maxChars) {
const keep = maxChars - 1;
tail = keep > 0 ? `…${tail.slice(-keep)}` : "…";
}
return tail;
}

/**
* Classify a failed Copilot attempt into a short, named failure class.
* @param {{
* hasOutput: boolean,
* isAuthErr?: boolean,
* isAuthenticationFailed?: boolean,
* isTransientCAPIError?: boolean,
* isMCPGatewayShutdown?: boolean,
* isMCPPolicy?: boolean,
* isModelNotSupported?: boolean,
* isNullTypeToolCall?: boolean,
* isQuotaExceeded?: boolean,
* isSDKSessionIdleTimeout?: boolean,
* hasNumerousPermissionDenied?: boolean,
* }} detection
* @returns {string}
*/
function classifyCopilotFailure(detection) {
if (detection.isQuotaExceeded) return "capi_quota_exceeded";
if (detection.isMCPPolicy) return "mcp_policy_blocked";
if (detection.isModelNotSupported) return "model_not_supported";
if (detection.isNullTypeToolCall) return "null_type_tool_call";
if (detection.isAuthErr) return "no_auth_info";
if (detection.isAuthenticationFailed) return "authentication_failed";
if (detection.isSDKSessionIdleTimeout) return "sdk_session_idle_timeout";
if (detection.isMCPGatewayShutdown) return "mcp_gateway_shutdown";
if (detection.hasNumerousPermissionDenied) return "permission_denied";
if (detection.isTransientCAPIError) return "capi_error_400";
return detection.hasOutput ? "partial_execution" : "no_output";
}

/**
* Extract provider auth failure details from Copilot output when available.
* @param {string} output
Expand Down Expand Up @@ -714,23 +799,45 @@ async function main() {
const isAuthenticationFailed = isAuthenticationFailedError(result.output);
const proxyAuthDiagnostic = buildCopilotProxyAuthFailureDiagnostic(result.output, process.env);
const isNullTypeToolCall = isNullTypeToolCallError(result.output);
const isSDKSessionIdleTimeout = isSDKSessionIdleTimeoutError(result.output);
const isMCPGatewayShutdown = isMCPGatewayShutdownError(result.output);
const permissionDeniedCount = countPermissionDeniedIssues(result.output);
const hasNumerousPermissionDenied = hasNumerousPermissionDeniedIssues(result.output);
const failureClass = classifyCopilotFailure({
hasOutput: result.hasOutput,
isAuthErr,
isAuthenticationFailed,
isTransientCAPIError: isCAPIError,
isMCPGatewayShutdown,
isMCPPolicy,
isModelNotSupported,
isNullTypeToolCall,
isQuotaExceeded,
isSDKSessionIdleTimeout,
hasNumerousPermissionDenied,
});
const outputTail = extractOutputTail(result.output);
log(
`attempt ${attempt + 1} failed:` +
` exitCode=${result.exitCode}` +
` failureClass=${failureClass}` +
` isCAPIError400=${isCAPIError}` +
` isCAPIQuotaExceededError=${isQuotaExceeded}` +
` isMCPPolicyError=${isMCPPolicy}` +
` isModelNotSupportedError=${isModelNotSupported}` +
` isNullTypeToolCallError=${isNullTypeToolCall}` +
` isSDKSessionIdleTimeoutError=${isSDKSessionIdleTimeout}` +
` isMCPGatewayShutdownError=${isMCPGatewayShutdown}` +
` isAuthError=${isAuthErr}` +
` isAuthenticationFailedError=${isAuthenticationFailed}` +
` permissionDeniedCount=${permissionDeniedCount}` +
` hasNumerousPermissionDenied=${hasNumerousPermissionDenied}` +
` hasOutput=${result.hasOutput}` +
` retriesRemaining=${MAX_RETRIES - attempt}`
);
if (outputTail) {
log(`attempt ${attempt + 1}: outputTail=${JSON.stringify(outputTail)}`);
}

// If a noop was written to safe-outputs during the failed run, the agent determined
// there was nothing to do (or the user indicated so before the agent ran). Retrying
Expand Down Expand Up @@ -908,11 +1015,15 @@ if (typeof module !== "undefined" && module.exports) {
resolveCopilotSDKCustomProviderFromReflect,
countPermissionDeniedIssues,
detectCopilotErrors,
classifyCopilotFailure,
extractOutputTail,
hasNumerousPermissionDeniedIssues,
INFERENCE_ACCESS_ERROR_PATTERN,
AGENTIC_ENGINE_TIMEOUT_PATTERN,
buildMissingToolPermissionIssuePayload,
isAuthenticationFailedError,
isMCPGatewayShutdownError,
isSDKSessionIdleTimeoutError,
startCopilotSDKServer,
stopCopilotSDKServer,
waitForCopilotSDKServer,
Expand Down
92 changes: 90 additions & 2 deletions actions/setup/js/copilot_harness.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ const { buildCopilotSDKEnv, isCopilotSDKEnabled } = require("./process_runner.cj
const {
appendSafeOutputLine,
buildMissingToolPermissionIssuePayload,
classifyCopilotFailure,
buildMissingToolAlternatives,
buildInfrastructureIncompletePayload,
buildCopilotProxyAuthFailureDiagnostic,
Expand All @@ -21,13 +22,15 @@ const {
detectCopilotErrors,
emitInfrastructureIncomplete,
emitMissingToolPermissionIssue,
extractOutputTail,
extractDeniedCommands,
hasNumerousPermissionDeniedIssues,
hasNoopInSafeOutputs,
INFERENCE_ACCESS_ERROR_PATTERN,
AGENTIC_ENGINE_TIMEOUT_PATTERN,
isDetectionPhase,
isAuthenticationFailedError,
isMCPGatewayShutdownError,
isModelAvailableInReflectData,
isModelAvailableInReflectFile,
resolveCopilotSDKCustomProviderFromReflect,
Expand All @@ -38,6 +41,7 @@ const {
generateCopilotConnectionToken,
GEMINI_MODEL_NAME_PREFIX,
isCAPIQuotaExceededError,
isSDKSessionIdleTimeoutError,
PROMPT_FILE_INLINE_THRESHOLD_BYTES,
resolvePromptFileArgs,
writeCopilotOutputs,
Expand Down Expand Up @@ -81,8 +85,9 @@ describe("copilot_harness.cjs", () => {
expect(isCAPIQuotaExceededError("CAPIError: 400 Bad Request")).toBe(false);
});

it("does not match generic 429 output without the observed quota-exceeded message", () => {
expect(isCAPIQuotaExceededError("CAPIError: 429 Too Many Requests")).toBe(false);
it("matches Copilot/CAPI 429 Too Many Requests output", () => {
expect(isCAPIQuotaExceededError("CAPIError: 429 Too Many Requests")).toBe(true);
expect(isCAPIQuotaExceededError("Last error: CAPIError: Too Many Requests")).toBe(true);
});

it("does not match unrelated errors", () => {
Expand Down Expand Up @@ -199,6 +204,16 @@ describe("copilot_harness.cjs", () => {
expect(shouldRetry(result, 0)).toBe(false);
});

it("does not retry Copilot/CAPI Too Many Requests output", () => {
const result = {
exitCode: 1,
hasOutput: true,
output: "Failed to get response from the AI model; retried 5 times. Last error: CAPIError: Too Many Requests",
};

expect(shouldRetry(result, 0)).toBe(false);
});

it("still retries generic partial-execution errors with output", () => {
const result = {
exitCode: 1,
Expand Down Expand Up @@ -239,6 +254,79 @@ describe("copilot_harness.cjs", () => {
expect(shouldRetry(result, 1, true, 1)).toBe(false);
});

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[/tdd] The new tests confirm that sdk_session_idle_timeout and mcp_gateway_shutdown are correctly named, but there are no shouldRetry tests verifying what the harness does with them at retry time. Per the PR description, both classes intentionally fall through to the partial_execution retry path when hasOutput: true. Without an explicit test, a future guard clause that accidentally makes them non-retryable would go undetected.

💡 Suggested regression tests
it("retries SDK session.idle timeout as partial execution", () => {
  const result = {
    exitCode: 1,
    hasOutput: true,
    output: "[copilot-sdk-driver] Timeout after 60000ms waiting for session.idle",
  };
  expect(shouldRetry(result, 0)).toBe(true);
});

it("retries MCP gateway shutdown as partial execution", () => {
  const result = {
    exitCode: 1,
    hasOutput: true,
    output: '{"message":"Gateway shutdown initiated","serversTerminated":1,"status":"closed"}',
  };
  expect(shouldRetry(result, 0)).toBe(true);
});

These tests serve as executable documentation of the intentional retry-through behaviour described in the PR.


describe("failure classification helpers", () => {
it("classifies Copilot SDK session.idle timeouts distinctly", () => {
const output = "[copilot-sdk-driver] Timeout after 60000ms waiting for session.idle";
expect(isSDKSessionIdleTimeoutError(output)).toBe(true);
expect(classifyCopilotFailure({ hasOutput: true, isSDKSessionIdleTimeout: true })).toBe("sdk_session_idle_timeout");
});

it("classifies MCP gateway shutdown distinctly when present in output", () => {
const output = 'Response: {"message":"Gateway shutdown initiated","serversTerminated":2,"status":"closed"}';
expect(isMCPGatewayShutdownError(output)).toBe(true);
expect(classifyCopilotFailure({ hasOutput: true, isMCPGatewayShutdown: true })).toBe("mcp_gateway_shutdown");
});

it("sdk_session_idle_timeout outranks permission_denied in failure classification", () => {
// Both flags set — the more specific signal must win.
expect(classifyCopilotFailure({ hasOutput: true, isSDKSessionIdleTimeout: true, hasNumerousPermissionDenied: true })).toBe("sdk_session_idle_timeout");
});

it("mcp_gateway_shutdown outranks permission_denied in failure classification", () => {
// Both flags set — the more specific signal must win.
expect(classifyCopilotFailure({ hasOutput: true, isMCPGatewayShutdown: true, hasNumerousPermissionDenied: true })).toBe("mcp_gateway_shutdown");
});

it("retries sdk_session_idle_timeout as partial execution (shouldRetry)", () => {
// sdk_session_idle_timeout is not a quota/permission blocker; the harness should retry.
const result = {
exitCode: 1,
hasOutput: true,
output: "[copilot-sdk-driver] Timeout after 60000ms waiting for session.idle",
};
const MAX_RETRIES = 3;
const shouldRetryLocal = (r, attempt) => {
if (r.exitCode === 0) return false;
if (hasNumerousPermissionDeniedIssues(r.output)) return false;
if (isCAPIQuotaExceededError(r.output)) return false;
return attempt < MAX_RETRIES && r.hasOutput;
};
expect(shouldRetryLocal(result, 0)).toBe(true);
});

it("retries mcp_gateway_shutdown as partial execution (shouldRetry)", () => {
// mcp_gateway_shutdown is not a quota/permission blocker; the harness should retry.
const result = {
exitCode: 1,
hasOutput: true,
output: '{"message":"Gateway shutdown initiated","serversTerminated":1,"status":"closed"}',
};
const MAX_RETRIES = 3;
const shouldRetryLocal = (r, attempt) => {
if (r.exitCode === 0) return false;
if (hasNumerousPermissionDeniedIssues(r.output)) return false;
if (isCAPIQuotaExceededError(r.output)) return false;
return attempt < MAX_RETRIES && r.hasOutput;
};
expect(shouldRetryLocal(result, 0)).toBe(true);
});

it("extractOutputTail never exceeds maxChars even when maxChars is 1", () => {
const tail = extractOutputTail("abc", { maxLines: 5, maxChars: 1 });
expect(tail.length).toBeLessThanOrEqual(1);
});

it("extracts a compact tail preview from large output", () => {
const tail = extractOutputTail(["line 1", "line 2", "line 3", "line 4"].join("\n"), { maxLines: 2, maxChars: 20 });
expect(tail).toBe("line 3\nline 4");
});

it("truncates very large output tails from the front", () => {
const tail = extractOutputTail(`prefix\n${"x".repeat(40)}`, { maxLines: 5, maxChars: 16 });
expect(tail).toBe(`…${"x".repeat(15)}`);
});
});

it("does not claim a retry when already at max retry attempt", () => {
const result = { exitCode: 2, hasOutput: false };
expect(shouldRetry(result, MAX_RETRIES, true, 0)).toBe(false);
Expand Down
17 changes: 12 additions & 5 deletions actions/setup/js/detect_agent_errors.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
* for the selected engine/account (for example unknown model name, model not
* found, or model unavailable for the plan).
* - capi_quota_exceeded_error: The Copilot CAPI quota has been exhausted
* (e.g., "CAPIError: 429 429 quota exceeded").
* or rate-limited (e.g., "CAPIError: 429 429 quota exceeded",
* "CAPIError: Too Many Requests"). All matched forms are treated as
* non-retryable because the Copilot SDK has already retried internally
* before surfacing the error.
*
* This replaces the individual bash scripts (detect_inference_access_error.sh,
* detect_mcp_policy_error.sh) with a single JavaScript step.
Expand Down Expand Up @@ -57,10 +60,14 @@ const AGENTIC_ENGINE_TIMEOUT_PATTERN = /signal=SIG(?:TERM|KILL|INT)/;
const MODEL_NOT_SUPPORTED_PATTERN =
/(?:The requested model is not supported|invalid model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|unknown model\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?\s+(?:is\s+)?(?:not found|does not exist|not supported|not available|unavailable))/i;

// Pattern: Copilot/CAPI quota exhaustion.
// Matches the observed error: "CAPIError: 429 429 quota exceeded".
// Quota exhaustion is a persistent, non-retryable condition.
const CAPI_QUOTA_EXCEEDED_PATTERN = /CAPIError:\s*429\s+429\s+quota exceeded/i;
// Pattern: Copilot/CAPI quota exhaustion and rate-limit responses.
// Matches all observed forms:
// "CAPIError: 429 429 quota exceeded" (original observed form)
// "CAPIError: 429 Too Many Requests" (HTTP 429 form)
// "CAPIError: Too Many Requests" (no status code in message)
// All forms are treated as non-retryable; the Copilot SDK has already retried
// internally before surfacing this error (evidenced by "retried 5 times" context).
const CAPI_QUOTA_EXCEEDED_PATTERN = /CAPIError:\s*(?:429\s+)?(?:429\s+quota exceeded|Too Many Requests)/i;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CAPI_QUOTA_EXCEEDED_PATTERN now treats any CAPIError: Too Many Requests as non-retryable quota exhaustion — a transient throttle will burn the entire retry budget.

💡 Details and suggested fix

Old (narrow):```js
/CAPIError:\s*429\s+429\s+quota exceeded/i


New (broad):
```js
/CAPIError:\s*(?:429\s+)?(?:429\s+quota exceeded|Too Many Requests)/i

A standard HTTP 429 from a transiently rate-limited backend is now indistinguishable from true per-account quota exhaustion. Since isQuotaExceeded triggers an immediate break with no retries, a single transient 429 will abort all remaining attempts — the opposite of the intended retry behavior for recoverable conditions.

The test string "retried 5 times. Last error: CAPIError: Too Many Requests" contains the SDK's internal retry count as evidence of persistence, but the regex matches CAPIError: Too Many Requests anywhere in output with no such context requirement.

Options:

  1. Separate the two semantics: add a distinct isTransientRateLimitError that matches bare Too Many Requests and retries with back-off, while reserving capi_quota_exceeded for quota exceeded forms.
  2. Accept the broadening but update the inline comment (line 61–62) and the module-level JSDoc (line 20) to explicitly document that both throttling and quota exhaustion are treated as non-retryable — so the trade-off is visible to future reviewers.


/**
* Determines if the collected output contains the observed Copilot/CAPI quota exhaustion error.
Expand Down
5 changes: 3 additions & 2 deletions actions/setup/js/detect_agent_errors.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,9 @@ describe("detect_agent_errors.cjs", () => {
expect(isCAPIQuotaExceededError("CAPIError: 429 429 QUOTA EXCEEDED")).toBe(true);
});

it("does not match other CAPIError 429 messages", () => {
expect(isCAPIQuotaExceededError("CAPIError: 429 Too Many Requests")).toBe(false);
it("matches Copilot/CAPI Too Many Requests output", () => {
expect(isCAPIQuotaExceededError("CAPIError: 429 Too Many Requests")).toBe(true);
expect(isCAPIQuotaExceededError("Last error: CAPIError: Too Many Requests")).toBe(true);
});

it("does not match CAPIError 400", () => {
Expand Down
Loading