diff --git a/containers/api-proxy/providers/copilot.js b/containers/api-proxy/providers/copilot.js index 0dfef2b1b..1b03bd2c0 100644 --- a/containers/api-proxy/providers/copilot.js +++ b/containers/api-proxy/providers/copilot.js @@ -196,14 +196,29 @@ function getCopilotModelFallbackPolicy(modelFallback, env = process.env) { || (env.COPILOT_PROVIDER_API_KEY || '').trim() || (env.COPILOT_API_KEY || '').trim() ); + + // Standard Copilot (no BYOK hints): suppress fallback because Copilot is + // authoritative for its own model catalogue. Rewriting a retired/restricted + // model to a middle-power fallback obscures the real error. if (!hasByokHints) { - return { effective: modelFallback, suppressed: false }; + return { + effective: { ...modelFallback, enabled: false }, + suppressed: true, + suppression_reason: 'copilot_standard_authoritative', + }; } + // BYOK pointing at a GitHub Copilot catalog target — still suppress because + // the catalog is authoritative. if (isGithubCopilotCatalogTarget(env.COPILOT_API_TARGET)) { - return { effective: modelFallback, suppressed: false }; + return { + effective: { ...modelFallback, enabled: false }, + suppressed: true, + suppression_reason: 'copilot_catalog_target_authoritative', + }; } + // BYOK pointing at a non-GitHub target (Azure, custom OpenAI, etc.) return { effective: { ...modelFallback, enabled: false }, suppressed: true, diff --git a/containers/api-proxy/server.js b/containers/api-proxy/server.js index f91be8c10..fcbd705fa 100644 --- a/containers/api-proxy/server.js +++ b/containers/api-proxy/server.js @@ -90,20 +90,33 @@ if (!HTTPS_PROXY) { // and rewritten to a concrete model name before forwarding to upstream. const MODEL_ALIASES_RAW = (process.env.AWF_MODEL_ALIASES || '').trim() || undefined; const MODEL_ALIASES = parseModelAliases(MODEL_ALIASES_RAW); -const DEFAULT_MODEL_FALLBACK = Object.freeze({ enabled: true, strategy: 'middle_power' }); +const DEFAULT_MODEL_FALLBACK = Object.freeze({ enabled: true, strategy: 'middle_power', excludeEngines: Object.freeze([]) }); + +function parseExcludeEngines(value) { + if (!Array.isArray(value)) return []; + return [...new Set( + value + .filter(engine => typeof engine === 'string') + .map(engine => engine.trim().toLowerCase()) + .filter(Boolean), + )]; +} function parseModelFallbackConfig(rawConfig) { - if (!rawConfig) return { ...DEFAULT_MODEL_FALLBACK }; + if (!rawConfig) return { ...DEFAULT_MODEL_FALLBACK, excludeEngines: [] }; try { const parsed = JSON.parse(rawConfig); - if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return { ...DEFAULT_MODEL_FALLBACK }; + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { + return { ...DEFAULT_MODEL_FALLBACK, excludeEngines: [] }; + } const enabled = parsed.enabled === undefined ? true : Boolean(parsed.enabled); const strategy = typeof parsed.strategy === 'string' && parsed.strategy.trim() ? parsed.strategy.trim() : DEFAULT_MODEL_FALLBACK.strategy; - return { enabled, strategy }; + const excludeEngines = parseExcludeEngines(parsed.excludeEngines); + return { enabled, strategy, excludeEngines }; } catch { - return { ...DEFAULT_MODEL_FALLBACK }; + return { ...DEFAULT_MODEL_FALLBACK, excludeEngines: [] }; } } @@ -126,6 +139,14 @@ logRequest('info', 'startup', { }); function getModelFallbackPolicyForProvider(provider) { + // Check excludeEngines first — applies to all providers + if (MODEL_FALLBACK.excludeEngines && MODEL_FALLBACK.excludeEngines.includes(provider.toLowerCase())) { + return { + effective: { ...MODEL_FALLBACK, enabled: false }, + suppressed: true, + suppression_reason: 'excluded_by_config', + }; + } if (provider !== 'copilot') { return { effective: MODEL_FALLBACK, suppressed: false }; } diff --git a/containers/api-proxy/server.models.test.js b/containers/api-proxy/server.models.test.js index e6e3906f4..282621f02 100644 --- a/containers/api-proxy/server.models.test.js +++ b/containers/api-proxy/server.models.test.js @@ -242,9 +242,9 @@ describe('makeModelBodyTransform', () => { stdoutSpy.mockClear(); isolatedServer.resetModelCacheState(); - isolatedServer.cachedModels.copilot = ['gpt-5.2', 'gpt-4.1', 'gpt-3.5-turbo']; + isolatedServer.cachedModels.openai = ['gpt-5.2', 'gpt-4.1', 'gpt-3.5-turbo']; - const transform = isolatedServer.makeModelBodyTransform('copilot'); + const transform = isolatedServer.makeModelBodyTransform('openai'); const transformed = await transform(Buffer.from(JSON.stringify({ model: 'sonnet', messages: [] }))); expect(transformed).toBeInstanceOf(Buffer); @@ -267,7 +267,7 @@ describe('makeModelBodyTransform', () => { it('emits model_fallback_skipped log when normal resolution succeeds', async () => { const prevAliases = process.env.AWF_MODEL_ALIASES; const prevFallback = process.env.AWF_MODEL_FALLBACK; - process.env.AWF_MODEL_ALIASES = JSON.stringify({ models: { sonnet: ['copilot/*sonnet*'] } }); + process.env.AWF_MODEL_ALIASES = JSON.stringify({ models: { sonnet: ['openai/*sonnet*'] } }); process.env.AWF_MODEL_FALLBACK = JSON.stringify({ enabled: true, strategy: 'middle_power' }); const stdoutSpy = jest.spyOn(process.stdout, 'write').mockImplementation(() => true); @@ -280,9 +280,9 @@ describe('makeModelBodyTransform', () => { stdoutSpy.mockClear(); isolatedServer.resetModelCacheState(); - isolatedServer.cachedModels.copilot = ['claude-sonnet-4.6', 'claude-haiku-4.5']; + isolatedServer.cachedModels.openai = ['claude-sonnet-4.6', 'claude-haiku-4.5']; - const transform = isolatedServer.makeModelBodyTransform('copilot'); + const transform = isolatedServer.makeModelBodyTransform('openai'); const transformed = await transform(Buffer.from(JSON.stringify({ model: 'sonnet', messages: [] }))); expect(transformed).toBeInstanceOf(Buffer); diff --git a/containers/api-proxy/server.network.test.js b/containers/api-proxy/server.network.test.js index bf59bac55..62192f00f 100644 --- a/containers/api-proxy/server.network.test.js +++ b/containers/api-proxy/server.network.test.js @@ -397,12 +397,13 @@ describe('reflectEndpoints', () => { expect(result.model_fallback).toEqual({ enabled: true, strategy: 'middle_power', + excludeEngines: [], }); expect(result.model_fallback_effective).toEqual({ - openai: { enabled: true, strategy: 'middle_power', suppressed: false }, - anthropic: { enabled: true, strategy: 'middle_power', suppressed: false }, - copilot: { enabled: true, strategy: 'middle_power', suppressed: false }, - gemini: { enabled: true, strategy: 'middle_power', suppressed: false }, + openai: { enabled: true, strategy: 'middle_power', excludeEngines: [], suppressed: false }, + anthropic: { enabled: true, strategy: 'middle_power', excludeEngines: [], suppressed: false }, + copilot: { enabled: false, strategy: 'middle_power', excludeEngines: [], suppressed: true, suppression_reason: 'copilot_standard_authoritative' }, + gemini: { enabled: true, strategy: 'middle_power', excludeEngines: [], suppressed: false }, }); }); @@ -424,10 +425,11 @@ describe('reflectEndpoints', () => { }); const reflect = isolatedServer.reflectEndpoints(); - expect(reflect.model_fallback).toEqual({ enabled: true, strategy: 'middle_power' }); + expect(reflect.model_fallback).toEqual({ enabled: true, strategy: 'middle_power', excludeEngines: [] }); expect(reflect.model_fallback_effective.copilot).toEqual({ enabled: false, strategy: 'middle_power', + excludeEngines: [], suppressed: true, suppression_reason: 'copilot_byok_non_githubcopilot_target', }); @@ -443,6 +445,81 @@ describe('reflectEndpoints', () => { } }); + it('should suppress fallback for standard Copilot (no BYOK hints)', () => { + const hintVars = [ + 'COPILOT_PROVIDER_TYPE', + 'COPILOT_PROVIDER_BASE_URL', + 'COPILOT_PROVIDER_API_KEY', + 'COPILOT_API_KEY', + 'COPILOT_API_TARGET', + ]; + const prevValues = Object.fromEntries(hintVars.map(name => [name, process.env[name]])); + for (const name of hintVars) delete process.env[name]; + + try { + let isolatedServer; + jest.isolateModules(() => { + isolatedServer = require('./server'); + }); + + const result = isolatedServer.reflectEndpoints(); + expect(result.model_fallback_effective.copilot).toEqual({ + enabled: false, + strategy: 'middle_power', + excludeEngines: [], + suppressed: true, + suppression_reason: 'copilot_standard_authoritative', + }); + } finally { + for (const [name, value] of Object.entries(prevValues)) { + if (value === undefined) delete process.env[name]; + else process.env[name] = value; + } + } + }); + + it('should suppress fallback for engines in excludeEngines config', () => { + const prevFallback = process.env.AWF_MODEL_FALLBACK; + process.env.AWF_MODEL_FALLBACK = JSON.stringify({ + enabled: true, + strategy: 'middle_power', + excludeEngines: [' OpenAI ', 'anthropic', 'OPENAI', ' '], + }); + + try { + let isolatedServer; + jest.isolateModules(() => { + isolatedServer = require('./server'); + }); + + const reflect = isolatedServer.reflectEndpoints(); + expect(reflect.model_fallback_effective.openai).toEqual({ + enabled: false, + strategy: 'middle_power', + excludeEngines: ['openai', 'anthropic'], + suppressed: true, + suppression_reason: 'excluded_by_config', + }); + expect(reflect.model_fallback_effective.anthropic).toEqual({ + enabled: false, + strategy: 'middle_power', + excludeEngines: ['openai', 'anthropic'], + suppressed: true, + suppression_reason: 'excluded_by_config', + }); + // Gemini not in excludeEngines — should NOT be suppressed + expect(reflect.model_fallback_effective.gemini).toEqual({ + enabled: true, + strategy: 'middle_power', + excludeEngines: ['openai', 'anthropic'], + suppressed: false, + }); + } finally { + if (prevFallback === undefined) delete process.env.AWF_MODEL_FALLBACK; + else process.env.AWF_MODEL_FALLBACK = prevFallback; + } + }); + it('should report models_fetch_complete true after fetch completes', async () => { await fetchStartupModels([]); const result = reflectEndpoints(); diff --git a/containers/api-proxy/upstream-response.js b/containers/api-proxy/upstream-response.js index 2d38a196a..85c2e4e44 100644 --- a/containers/api-proxy/upstream-response.js +++ b/containers/api-proxy/upstream-response.js @@ -57,8 +57,17 @@ function createUpstreamResponseHandlers({ logRequest('info', 'request_complete', logFields); } - function logUpstreamAuthError(statusCode, { requestId, provider, targetHost, req }) { - if (statusCode === 400 || statusCode === 401 || statusCode === 403) { + function logUpstreamAuthError(statusCode, { requestId, provider, targetHost, req, responseBody }) { + if (statusCode === 401 || statusCode === 403) { + logRequest('warn', 'upstream_auth_error', { + request_id: requestId, provider, status: statusCode, + upstream_host: targetHost, path: sanitizeForLog(req.url), + message: `Upstream returned ${statusCode} — check that the API key is valid and correctly formatted`, + }); + } else if (statusCode === 400) { + // Suppress generic auth-error message when the 400 is a model-not-supported + // error — that case is handled by the model_unavailable diagnostic. + if (responseBody && parseModelNotSupportedFromBody(responseBody)) return; logRequest('warn', 'upstream_auth_error', { request_id: requestId, provider, status: statusCode, upstream_host: targetHost, path: sanitizeForLog(req.url), @@ -142,8 +151,22 @@ function createUpstreamResponseHandlers({ return; } + // ── (c) Model-unavailable diagnostic (retries exhausted or non-retryable) ─── + if (proxyRes.statusCode === 400 && parseModelNotSupportedFromBody(responseBody)) { + logRequest('error', 'model_unavailable', { + request_id: requestId, + provider, + status: proxyRes.statusCode, + path: sanitizeForLog(req.url), + retries_attempted: modelNotSupportedRetryCount, + message: `Model is unavailable or retired — the requested model is not supported by ${provider}. ` + + 'Check that the model name is correct and not deprecated. ' + + 'If using model aliases, verify the alias resolves to an available model.', + }); + } + logRequestCompletion(proxyRes.statusCode, responseBytes, initiatorSent, billingInfo, completionCtx); - logUpstreamAuthError(proxyRes.statusCode, authErrCtx); + logUpstreamAuthError(proxyRes.statusCode, { ...authErrCtx, responseBody }); const resHeaders = { ...proxyRes.headers, diff --git a/docs/awf-config-spec.md b/docs/awf-config-spec.md index ab0010a99..dac0dfa1f 100644 --- a/docs/awf-config-spec.md +++ b/docs/awf-config-spec.md @@ -769,6 +769,7 @@ Model fallback is controlled via `apiProxy.modelFallback`: |-------|------|---------|-------------| | `enabled` | boolean | `true` | Enable/disable the fallback mechanism | | `strategy` | string | `middle_power` | Selection strategy (`middle_power` is currently the only strategy) | +| `excludeEngines` | string[] | `[]` | Engines for which middle-power fallback is suppressed (e.g. `["openai"]`). Excluded engines receive native model-unavailable errors instead of silent rewrites. | ### 12.2 Middle-Power Strategy @@ -812,6 +813,14 @@ The fallback is **NOT** activated when: - A family version fallback is available (for `gpt-5.*` only) - The fallback is disabled (`enabled: false`) - An alias has `fallback: false` (see §12.4) +- The provider is in the `excludeEngines` list +- Copilot engine in standard mode (no BYOK env vars): the Copilot CLI is + authoritative for its own model catalogue, so retired/restricted model names + should fail fast with a clear upstream error rather than being silently + rewritten to a middle-power fallback +- Copilot BYOK that still targets a GitHub Copilot catalog host (for example + `api.githubcopilot.com`): the catalog remains authoritative, so fallback is + still suppressed - Copilot is configured for a BYOK non-`githubcopilot` target (for example Azure OpenAI deployment endpoints), where deployment names are provider-local and must not be rewritten to catalog model IDs diff --git a/docs/awf-config.schema.json b/docs/awf-config.schema.json index 430038110..89610ac71 100644 --- a/docs/awf-config.schema.json +++ b/docs/awf-config.schema.json @@ -100,6 +100,13 @@ "middle_power" ], "description": "Fallback selection strategy. Currently only 'middle_power' is supported." + }, + "excludeEngines": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of engine/provider names for which middle-power fallback is suppressed. Use this to let specific providers handle model-unavailable errors natively instead of rewriting to a fallback model." } } }, diff --git a/src/awf-config-schema.json b/src/awf-config-schema.json index 430038110..89610ac71 100644 --- a/src/awf-config-schema.json +++ b/src/awf-config-schema.json @@ -100,6 +100,13 @@ "middle_power" ], "description": "Fallback selection strategy. Currently only 'middle_power' is supported." + }, + "excludeEngines": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of engine/provider names for which middle-power fallback is suppressed. Use this to let specific providers handle model-unavailable errors natively instead of rewriting to a fallback model." } } },