Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions containers/api-proxy/providers/copilot.js
Original file line number Diff line number Diff line change
Expand Up @@ -196,14 +196,29 @@ function getCopilotModelFallbackPolicy(modelFallback, env = process.env) {
|| (env.COPILOT_PROVIDER_API_KEY || '').trim()
|| (env.COPILOT_API_KEY || '').trim()
);

// Standard Copilot (no BYOK hints): suppress fallback because Copilot is
// authoritative for its own model catalogue. Rewriting a retired/restricted
// model to a middle-power fallback obscures the real error.
if (!hasByokHints) {
return { effective: modelFallback, suppressed: false };
return {
effective: { ...modelFallback, enabled: false },
suppressed: true,
suppression_reason: 'copilot_standard_authoritative',
};
}

// BYOK pointing at a GitHub Copilot catalog target — still suppress because
// the catalog is authoritative.
if (isGithubCopilotCatalogTarget(env.COPILOT_API_TARGET)) {
return { effective: modelFallback, suppressed: false };
return {
effective: { ...modelFallback, enabled: false },
suppressed: true,
suppression_reason: 'copilot_catalog_target_authoritative',
};
}

// BYOK pointing at a non-GitHub target (Azure, custom OpenAI, etc.)
return {
effective: { ...modelFallback, enabled: false },
suppressed: true,
Expand Down
31 changes: 26 additions & 5 deletions containers/api-proxy/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -90,20 +90,33 @@ if (!HTTPS_PROXY) {
// and rewritten to a concrete model name before forwarding to upstream.
const MODEL_ALIASES_RAW = (process.env.AWF_MODEL_ALIASES || '').trim() || undefined;
const MODEL_ALIASES = parseModelAliases(MODEL_ALIASES_RAW);
const DEFAULT_MODEL_FALLBACK = Object.freeze({ enabled: true, strategy: 'middle_power' });
const DEFAULT_MODEL_FALLBACK = Object.freeze({ enabled: true, strategy: 'middle_power', excludeEngines: Object.freeze([]) });

function parseExcludeEngines(value) {
if (!Array.isArray(value)) return [];
return [...new Set(
value
.filter(engine => typeof engine === 'string')
.map(engine => engine.trim().toLowerCase())
.filter(Boolean),
)];
}

function parseModelFallbackConfig(rawConfig) {
if (!rawConfig) return { ...DEFAULT_MODEL_FALLBACK };
if (!rawConfig) return { ...DEFAULT_MODEL_FALLBACK, excludeEngines: [] };
try {
const parsed = JSON.parse(rawConfig);
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return { ...DEFAULT_MODEL_FALLBACK };
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
return { ...DEFAULT_MODEL_FALLBACK, excludeEngines: [] };
}
const enabled = parsed.enabled === undefined ? true : Boolean(parsed.enabled);
const strategy = typeof parsed.strategy === 'string' && parsed.strategy.trim()
? parsed.strategy.trim()
: DEFAULT_MODEL_FALLBACK.strategy;
return { enabled, strategy };
const excludeEngines = parseExcludeEngines(parsed.excludeEngines);
return { enabled, strategy, excludeEngines };
} catch {
return { ...DEFAULT_MODEL_FALLBACK };
return { ...DEFAULT_MODEL_FALLBACK, excludeEngines: [] };
}
}

Expand All @@ -126,6 +139,14 @@ logRequest('info', 'startup', {
});

function getModelFallbackPolicyForProvider(provider) {
// Check excludeEngines first — applies to all providers
if (MODEL_FALLBACK.excludeEngines && MODEL_FALLBACK.excludeEngines.includes(provider.toLowerCase())) {
return {
effective: { ...MODEL_FALLBACK, enabled: false },
suppressed: true,
suppression_reason: 'excluded_by_config',
};
}
if (provider !== 'copilot') {
return { effective: MODEL_FALLBACK, suppressed: false };
}
Expand Down
10 changes: 5 additions & 5 deletions containers/api-proxy/server.models.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -242,9 +242,9 @@ describe('makeModelBodyTransform', () => {

stdoutSpy.mockClear();
isolatedServer.resetModelCacheState();
isolatedServer.cachedModels.copilot = ['gpt-5.2', 'gpt-4.1', 'gpt-3.5-turbo'];
isolatedServer.cachedModels.openai = ['gpt-5.2', 'gpt-4.1', 'gpt-3.5-turbo'];

const transform = isolatedServer.makeModelBodyTransform('copilot');
const transform = isolatedServer.makeModelBodyTransform('openai');
const transformed = await transform(Buffer.from(JSON.stringify({ model: 'sonnet', messages: [] })));
expect(transformed).toBeInstanceOf(Buffer);

Expand All @@ -267,7 +267,7 @@ describe('makeModelBodyTransform', () => {
it('emits model_fallback_skipped log when normal resolution succeeds', async () => {
const prevAliases = process.env.AWF_MODEL_ALIASES;
const prevFallback = process.env.AWF_MODEL_FALLBACK;
process.env.AWF_MODEL_ALIASES = JSON.stringify({ models: { sonnet: ['copilot/*sonnet*'] } });
process.env.AWF_MODEL_ALIASES = JSON.stringify({ models: { sonnet: ['openai/*sonnet*'] } });
process.env.AWF_MODEL_FALLBACK = JSON.stringify({ enabled: true, strategy: 'middle_power' });

const stdoutSpy = jest.spyOn(process.stdout, 'write').mockImplementation(() => true);
Expand All @@ -280,9 +280,9 @@ describe('makeModelBodyTransform', () => {

stdoutSpy.mockClear();
isolatedServer.resetModelCacheState();
isolatedServer.cachedModels.copilot = ['claude-sonnet-4.6', 'claude-haiku-4.5'];
isolatedServer.cachedModels.openai = ['claude-sonnet-4.6', 'claude-haiku-4.5'];

const transform = isolatedServer.makeModelBodyTransform('copilot');
const transform = isolatedServer.makeModelBodyTransform('openai');
const transformed = await transform(Buffer.from(JSON.stringify({ model: 'sonnet', messages: [] })));
expect(transformed).toBeInstanceOf(Buffer);

Expand Down
87 changes: 82 additions & 5 deletions containers/api-proxy/server.network.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -397,12 +397,13 @@ describe('reflectEndpoints', () => {
expect(result.model_fallback).toEqual({
enabled: true,
strategy: 'middle_power',
excludeEngines: [],
});
expect(result.model_fallback_effective).toEqual({
openai: { enabled: true, strategy: 'middle_power', suppressed: false },
anthropic: { enabled: true, strategy: 'middle_power', suppressed: false },
copilot: { enabled: true, strategy: 'middle_power', suppressed: false },
gemini: { enabled: true, strategy: 'middle_power', suppressed: false },
openai: { enabled: true, strategy: 'middle_power', excludeEngines: [], suppressed: false },
anthropic: { enabled: true, strategy: 'middle_power', excludeEngines: [], suppressed: false },
copilot: { enabled: false, strategy: 'middle_power', excludeEngines: [], suppressed: true, suppression_reason: 'copilot_standard_authoritative' },
gemini: { enabled: true, strategy: 'middle_power', excludeEngines: [], suppressed: false },
});
});

Expand All @@ -424,10 +425,11 @@ describe('reflectEndpoints', () => {
});

const reflect = isolatedServer.reflectEndpoints();
expect(reflect.model_fallback).toEqual({ enabled: true, strategy: 'middle_power' });
expect(reflect.model_fallback).toEqual({ enabled: true, strategy: 'middle_power', excludeEngines: [] });
expect(reflect.model_fallback_effective.copilot).toEqual({
enabled: false,
strategy: 'middle_power',
excludeEngines: [],
suppressed: true,
suppression_reason: 'copilot_byok_non_githubcopilot_target',
});
Expand All @@ -443,6 +445,81 @@ describe('reflectEndpoints', () => {
}
});

it('should suppress fallback for standard Copilot (no BYOK hints)', () => {
const hintVars = [
'COPILOT_PROVIDER_TYPE',
'COPILOT_PROVIDER_BASE_URL',
'COPILOT_PROVIDER_API_KEY',
'COPILOT_API_KEY',
'COPILOT_API_TARGET',
];
const prevValues = Object.fromEntries(hintVars.map(name => [name, process.env[name]]));
for (const name of hintVars) delete process.env[name];

try {
let isolatedServer;
jest.isolateModules(() => {
isolatedServer = require('./server');
});

const result = isolatedServer.reflectEndpoints();
expect(result.model_fallback_effective.copilot).toEqual({
enabled: false,
strategy: 'middle_power',
excludeEngines: [],
suppressed: true,
suppression_reason: 'copilot_standard_authoritative',
});
} finally {
for (const [name, value] of Object.entries(prevValues)) {
if (value === undefined) delete process.env[name];
else process.env[name] = value;
}
}
});

it('should suppress fallback for engines in excludeEngines config', () => {
const prevFallback = process.env.AWF_MODEL_FALLBACK;
process.env.AWF_MODEL_FALLBACK = JSON.stringify({
enabled: true,
strategy: 'middle_power',
excludeEngines: [' OpenAI ', 'anthropic', 'OPENAI', ' '],
});

try {
let isolatedServer;
jest.isolateModules(() => {
isolatedServer = require('./server');
});

const reflect = isolatedServer.reflectEndpoints();
expect(reflect.model_fallback_effective.openai).toEqual({
enabled: false,
strategy: 'middle_power',
excludeEngines: ['openai', 'anthropic'],
suppressed: true,
suppression_reason: 'excluded_by_config',
});
expect(reflect.model_fallback_effective.anthropic).toEqual({
enabled: false,
strategy: 'middle_power',
excludeEngines: ['openai', 'anthropic'],
suppressed: true,
suppression_reason: 'excluded_by_config',
});
// Gemini not in excludeEngines — should NOT be suppressed
expect(reflect.model_fallback_effective.gemini).toEqual({
enabled: true,
strategy: 'middle_power',
excludeEngines: ['openai', 'anthropic'],
suppressed: false,
});
} finally {
if (prevFallback === undefined) delete process.env.AWF_MODEL_FALLBACK;
else process.env.AWF_MODEL_FALLBACK = prevFallback;
}
});

it('should report models_fetch_complete true after fetch completes', async () => {
await fetchStartupModels([]);
const result = reflectEndpoints();
Expand Down
29 changes: 26 additions & 3 deletions containers/api-proxy/upstream-response.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,17 @@ function createUpstreamResponseHandlers({
logRequest('info', 'request_complete', logFields);
}

function logUpstreamAuthError(statusCode, { requestId, provider, targetHost, req }) {
if (statusCode === 400 || statusCode === 401 || statusCode === 403) {
function logUpstreamAuthError(statusCode, { requestId, provider, targetHost, req, responseBody }) {
if (statusCode === 401 || statusCode === 403) {
logRequest('warn', 'upstream_auth_error', {
request_id: requestId, provider, status: statusCode,
upstream_host: targetHost, path: sanitizeForLog(req.url),
message: `Upstream returned ${statusCode} — check that the API key is valid and correctly formatted`,
});
} else if (statusCode === 400) {
// Suppress generic auth-error message when the 400 is a model-not-supported
// error — that case is handled by the model_unavailable diagnostic.
if (responseBody && parseModelNotSupportedFromBody(responseBody)) return;
logRequest('warn', 'upstream_auth_error', {
request_id: requestId, provider, status: statusCode,
upstream_host: targetHost, path: sanitizeForLog(req.url),
Expand Down Expand Up @@ -142,8 +151,22 @@ function createUpstreamResponseHandlers({
return;
}

// ── (c) Model-unavailable diagnostic (retries exhausted or non-retryable) ───
if (proxyRes.statusCode === 400 && parseModelNotSupportedFromBody(responseBody)) {
logRequest('error', 'model_unavailable', {
request_id: requestId,
provider,
status: proxyRes.statusCode,
path: sanitizeForLog(req.url),
retries_attempted: modelNotSupportedRetryCount,
message: `Model is unavailable or retired — the requested model is not supported by ${provider}. ` +
'Check that the model name is correct and not deprecated. ' +
'If using model aliases, verify the alias resolves to an available model.',
});
}

logRequestCompletion(proxyRes.statusCode, responseBytes, initiatorSent, billingInfo, completionCtx);
logUpstreamAuthError(proxyRes.statusCode, authErrCtx);
logUpstreamAuthError(proxyRes.statusCode, { ...authErrCtx, responseBody });

const resHeaders = {
...proxyRes.headers,
Expand Down
9 changes: 9 additions & 0 deletions docs/awf-config-spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,7 @@ Model fallback is controlled via `apiProxy.modelFallback`:
|-------|------|---------|-------------|
| `enabled` | boolean | `true` | Enable/disable the fallback mechanism |
| `strategy` | string | `middle_power` | Selection strategy (`middle_power` is currently the only strategy) |
| `excludeEngines` | string[] | `[]` | Engines for which middle-power fallback is suppressed (e.g. `["openai"]`). Excluded engines receive native model-unavailable errors instead of silent rewrites. |

### 12.2 Middle-Power Strategy

Expand Down Expand Up @@ -812,6 +813,14 @@ The fallback is **NOT** activated when:
- A family version fallback is available (for `gpt-5.*` only)
- The fallback is disabled (`enabled: false`)
- An alias has `fallback: false` (see §12.4)
- The provider is in the `excludeEngines` list
- Copilot engine in standard mode (no BYOK env vars): the Copilot CLI is
authoritative for its own model catalogue, so retired/restricted model names
should fail fast with a clear upstream error rather than being silently
rewritten to a middle-power fallback
- Copilot BYOK that still targets a GitHub Copilot catalog host (for example
`api.githubcopilot.com`): the catalog remains authoritative, so fallback is
still suppressed
- Copilot is configured for a BYOK non-`githubcopilot` target (for example Azure
OpenAI deployment endpoints), where deployment names are provider-local and
must not be rewritten to catalog model IDs
Comment on lines +816 to 826
Expand Down
7 changes: 7 additions & 0 deletions docs/awf-config.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@
"middle_power"
],
"description": "Fallback selection strategy. Currently only 'middle_power' is supported."
},
"excludeEngines": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of engine/provider names for which middle-power fallback is suppressed. Use this to let specific providers handle model-unavailable errors natively instead of rewriting to a fallback model."
}
}
},
Expand Down
7 changes: 7 additions & 0 deletions src/awf-config-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@
"middle_power"
],
"description": "Fallback selection strategy. Currently only 'middle_power' is supported."
},
"excludeEngines": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of engine/provider names for which middle-power fallback is suppressed. Use this to let specific providers handle model-unavailable errors natively instead of rewriting to a fallback model."
}
}
},
Expand Down
Loading