-
Notifications
You must be signed in to change notification settings - Fork 427
Improve Copilot harness classification for opaque exitCode=1 failures #39959
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
de3cb2f
89f2e16
4aa981c
502d97b
d83ab10
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,7 @@ const { buildCopilotSDKEnv, isCopilotSDKEnabled } = require("./process_runner.cj | |
| const { | ||
| appendSafeOutputLine, | ||
| buildMissingToolPermissionIssuePayload, | ||
| classifyCopilotFailure, | ||
| buildMissingToolAlternatives, | ||
| buildInfrastructureIncompletePayload, | ||
| buildCopilotProxyAuthFailureDiagnostic, | ||
|
|
@@ -21,13 +22,15 @@ const { | |
| detectCopilotErrors, | ||
| emitInfrastructureIncomplete, | ||
| emitMissingToolPermissionIssue, | ||
| extractOutputTail, | ||
| extractDeniedCommands, | ||
| hasNumerousPermissionDeniedIssues, | ||
| hasNoopInSafeOutputs, | ||
| INFERENCE_ACCESS_ERROR_PATTERN, | ||
| AGENTIC_ENGINE_TIMEOUT_PATTERN, | ||
| isDetectionPhase, | ||
| isAuthenticationFailedError, | ||
| isMCPGatewayShutdownError, | ||
| isModelAvailableInReflectData, | ||
| isModelAvailableInReflectFile, | ||
| resolveCopilotSDKCustomProviderFromReflect, | ||
|
|
@@ -38,6 +41,7 @@ const { | |
| generateCopilotConnectionToken, | ||
| GEMINI_MODEL_NAME_PREFIX, | ||
| isCAPIQuotaExceededError, | ||
| isSDKSessionIdleTimeoutError, | ||
| PROMPT_FILE_INLINE_THRESHOLD_BYTES, | ||
| resolvePromptFileArgs, | ||
| writeCopilotOutputs, | ||
|
|
@@ -81,8 +85,9 @@ describe("copilot_harness.cjs", () => { | |
| expect(isCAPIQuotaExceededError("CAPIError: 400 Bad Request")).toBe(false); | ||
| }); | ||
|
|
||
| it("does not match generic 429 output without the observed quota-exceeded message", () => { | ||
| expect(isCAPIQuotaExceededError("CAPIError: 429 Too Many Requests")).toBe(false); | ||
| it("matches Copilot/CAPI 429 Too Many Requests output", () => { | ||
| expect(isCAPIQuotaExceededError("CAPIError: 429 Too Many Requests")).toBe(true); | ||
| expect(isCAPIQuotaExceededError("Last error: CAPIError: Too Many Requests")).toBe(true); | ||
| }); | ||
|
|
||
| it("does not match unrelated errors", () => { | ||
|
|
@@ -199,6 +204,16 @@ describe("copilot_harness.cjs", () => { | |
| expect(shouldRetry(result, 0)).toBe(false); | ||
| }); | ||
|
|
||
| it("does not retry Copilot/CAPI Too Many Requests output", () => { | ||
| const result = { | ||
| exitCode: 1, | ||
| hasOutput: true, | ||
| output: "Failed to get response from the AI model; retried 5 times. Last error: CAPIError: Too Many Requests", | ||
| }; | ||
|
|
||
| expect(shouldRetry(result, 0)).toBe(false); | ||
| }); | ||
|
|
||
| it("still retries generic partial-execution errors with output", () => { | ||
| const result = { | ||
| exitCode: 1, | ||
|
|
@@ -239,6 +254,79 @@ describe("copilot_harness.cjs", () => { | |
| expect(shouldRetry(result, 1, true, 1)).toBe(false); | ||
| }); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/tdd] The new tests confirm that 💡 Suggested regression testsit("retries SDK session.idle timeout as partial execution", () => {
const result = {
exitCode: 1,
hasOutput: true,
output: "[copilot-sdk-driver] Timeout after 60000ms waiting for session.idle",
};
expect(shouldRetry(result, 0)).toBe(true);
});
it("retries MCP gateway shutdown as partial execution", () => {
const result = {
exitCode: 1,
hasOutput: true,
output: '{"message":"Gateway shutdown initiated","serversTerminated":1,"status":"closed"}',
};
expect(shouldRetry(result, 0)).toBe(true);
});These tests serve as executable documentation of the intentional retry-through behaviour described in the PR. |
||
|
|
||
| describe("failure classification helpers", () => { | ||
| it("classifies Copilot SDK session.idle timeouts distinctly", () => { | ||
| const output = "[copilot-sdk-driver] Timeout after 60000ms waiting for session.idle"; | ||
| expect(isSDKSessionIdleTimeoutError(output)).toBe(true); | ||
| expect(classifyCopilotFailure({ hasOutput: true, isSDKSessionIdleTimeout: true })).toBe("sdk_session_idle_timeout"); | ||
| }); | ||
|
|
||
| it("classifies MCP gateway shutdown distinctly when present in output", () => { | ||
| const output = 'Response: {"message":"Gateway shutdown initiated","serversTerminated":2,"status":"closed"}'; | ||
| expect(isMCPGatewayShutdownError(output)).toBe(true); | ||
| expect(classifyCopilotFailure({ hasOutput: true, isMCPGatewayShutdown: true })).toBe("mcp_gateway_shutdown"); | ||
| }); | ||
|
|
||
| it("sdk_session_idle_timeout outranks permission_denied in failure classification", () => { | ||
| // Both flags set — the more specific signal must win. | ||
| expect(classifyCopilotFailure({ hasOutput: true, isSDKSessionIdleTimeout: true, hasNumerousPermissionDenied: true })).toBe("sdk_session_idle_timeout"); | ||
| }); | ||
|
|
||
| it("mcp_gateway_shutdown outranks permission_denied in failure classification", () => { | ||
| // Both flags set — the more specific signal must win. | ||
| expect(classifyCopilotFailure({ hasOutput: true, isMCPGatewayShutdown: true, hasNumerousPermissionDenied: true })).toBe("mcp_gateway_shutdown"); | ||
| }); | ||
|
|
||
| it("retries sdk_session_idle_timeout as partial execution (shouldRetry)", () => { | ||
| // sdk_session_idle_timeout is not a quota/permission blocker; the harness should retry. | ||
| const result = { | ||
| exitCode: 1, | ||
| hasOutput: true, | ||
| output: "[copilot-sdk-driver] Timeout after 60000ms waiting for session.idle", | ||
| }; | ||
| const MAX_RETRIES = 3; | ||
| const shouldRetryLocal = (r, attempt) => { | ||
| if (r.exitCode === 0) return false; | ||
| if (hasNumerousPermissionDeniedIssues(r.output)) return false; | ||
| if (isCAPIQuotaExceededError(r.output)) return false; | ||
| return attempt < MAX_RETRIES && r.hasOutput; | ||
| }; | ||
| expect(shouldRetryLocal(result, 0)).toBe(true); | ||
| }); | ||
|
|
||
| it("retries mcp_gateway_shutdown as partial execution (shouldRetry)", () => { | ||
| // mcp_gateway_shutdown is not a quota/permission blocker; the harness should retry. | ||
| const result = { | ||
| exitCode: 1, | ||
| hasOutput: true, | ||
| output: '{"message":"Gateway shutdown initiated","serversTerminated":1,"status":"closed"}', | ||
| }; | ||
| const MAX_RETRIES = 3; | ||
| const shouldRetryLocal = (r, attempt) => { | ||
| if (r.exitCode === 0) return false; | ||
| if (hasNumerousPermissionDeniedIssues(r.output)) return false; | ||
| if (isCAPIQuotaExceededError(r.output)) return false; | ||
| return attempt < MAX_RETRIES && r.hasOutput; | ||
| }; | ||
| expect(shouldRetryLocal(result, 0)).toBe(true); | ||
| }); | ||
|
|
||
| it("extractOutputTail never exceeds maxChars even when maxChars is 1", () => { | ||
| const tail = extractOutputTail("abc", { maxLines: 5, maxChars: 1 }); | ||
| expect(tail.length).toBeLessThanOrEqual(1); | ||
| }); | ||
|
|
||
| it("extracts a compact tail preview from large output", () => { | ||
| const tail = extractOutputTail(["line 1", "line 2", "line 3", "line 4"].join("\n"), { maxLines: 2, maxChars: 20 }); | ||
| expect(tail).toBe("line 3\nline 4"); | ||
| }); | ||
|
|
||
| it("truncates very large output tails from the front", () => { | ||
| const tail = extractOutputTail(`prefix\n${"x".repeat(40)}`, { maxLines: 5, maxChars: 16 }); | ||
| expect(tail).toBe(`…${"x".repeat(15)}`); | ||
| }); | ||
| }); | ||
|
|
||
| it("does not claim a retry when already at max retry attempt", () => { | ||
| const result = { exitCode: 2, hasOutput: false }; | ||
| expect(shouldRetry(result, MAX_RETRIES, true, 0)).toBe(false); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,7 +17,10 @@ | |
| * for the selected engine/account (for example unknown model name, model not | ||
| * found, or model unavailable for the plan). | ||
| * - capi_quota_exceeded_error: The Copilot CAPI quota has been exhausted | ||
| * (e.g., "CAPIError: 429 429 quota exceeded"). | ||
| * or rate-limited (e.g., "CAPIError: 429 429 quota exceeded", | ||
| * "CAPIError: Too Many Requests"). All matched forms are treated as | ||
| * non-retryable because the Copilot SDK has already retried internally | ||
| * before surfacing the error. | ||
| * | ||
| * This replaces the individual bash scripts (detect_inference_access_error.sh, | ||
| * detect_mcp_policy_error.sh) with a single JavaScript step. | ||
|
|
@@ -57,10 +60,14 @@ const AGENTIC_ENGINE_TIMEOUT_PATTERN = /signal=SIG(?:TERM|KILL|INT)/; | |
| const MODEL_NOT_SUPPORTED_PATTERN = | ||
| /(?:The requested model is not supported|invalid model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|unknown model\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?\s+(?:is\s+)?(?:not found|does not exist|not supported|not available|unavailable))/i; | ||
|
|
||
| // Pattern: Copilot/CAPI quota exhaustion. | ||
| // Matches the observed error: "CAPIError: 429 429 quota exceeded". | ||
| // Quota exhaustion is a persistent, non-retryable condition. | ||
| const CAPI_QUOTA_EXCEEDED_PATTERN = /CAPIError:\s*429\s+429\s+quota exceeded/i; | ||
| // Pattern: Copilot/CAPI quota exhaustion and rate-limit responses. | ||
| // Matches all observed forms: | ||
| // "CAPIError: 429 429 quota exceeded" (original observed form) | ||
| // "CAPIError: 429 Too Many Requests" (HTTP 429 form) | ||
| // "CAPIError: Too Many Requests" (no status code in message) | ||
| // All forms are treated as non-retryable; the Copilot SDK has already retried | ||
| // internally before surfacing this error (evidenced by "retried 5 times" context). | ||
| const CAPI_QUOTA_EXCEEDED_PATTERN = /CAPIError:\s*(?:429\s+)?(?:429\s+quota exceeded|Too Many Requests)/i; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
💡 Details and suggested fixOld (narrow):```js A standard HTTP 429 from a transiently rate-limited backend is now indistinguishable from true per-account quota exhaustion. Since The test string Options:
|
||
|
|
||
| /** | ||
| * Determines if the collected output contains the observed Copilot/CAPI quota exhaustion error. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
filter(Boolean)removes blank lines beforeslice(-maxLines)—maxLinescounts non-empty lines, not total lines, so callers silently get fewer context lines than requested.💡 Details
If the last section of output is:
...after
filter(Boolean)you have 3 non-empty lines. A caller requestingmaxLines: 2sees only the two stack trace lines with no indication that the separating blank lines — and thus the visual structure of the error — were dropped.If blank lines should be preserved, remove
filter(Boolean). If they should be collapsed, document thatmaxLinesapplies after filtering so callers aren't surprised by unexpectedly thin output.