Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion containers/api-proxy/proxy-request.js
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,20 @@ const limiter = rateLimiter.create();
/** When false, token-budget warnings are never injected into request bodies. */
const isSteeringEnabled = () => process.env.AWF_ENABLE_TOKEN_STEERING === 'true';

/**
* Backoff delays (ms) between successive model-not-supported retries.
* Index 0 → delay before the 1st retry, index 1 → delay before the 2nd retry.
*/
const MODEL_NOT_SUPPORTED_RETRY_DELAYS_MS = [1000, 2000];

/** Resolves after `ms` milliseconds (overridable in tests via module-level setter). */
let _sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms));

/** @internal Test-only: replace the sleep implementation so retries are instant. */
function _setSleepForTests(fn) { _sleep = fn; }
/** @internal Test-only: restore the real sleep implementation. */
function _resetSleepForTests() { _sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms)); }

function getUrlPathForSpan(requestUrl) {
if (typeof requestUrl !== 'string' || !requestUrl) return '/';
try {
Expand Down Expand Up @@ -246,11 +260,13 @@ const { handleUpstreamResponse } = createUpstreamResponseHandlers({
* @param {object} requestHeaders - Headers for the upstream request
* @param {{ body: Buffer, targetHost: string, upstreamPath: string, req: object,
* res: object, provider: string, requestId: string, startTime: number,
* span: object, requestBytes: number, hasRetried?: boolean }} ctx
* span: object, requestBytes: number, hasRetried?: boolean,
* modelNotSupportedRetryCount?: number }} ctx
*/
function sendUpstreamRequest(requestHeaders, {
body, targetHost, upstreamPath, req, res, provider, requestId, startTime, span, requestBytes,
hasRetried = false,
modelNotSupportedRetryCount = 0,
}) {
const options = {
hostname: targetHost, port: 443, path: upstreamPath,
Expand All @@ -262,10 +278,22 @@ function sendUpstreamRequest(requestHeaders, {
handleUpstreamResponse(proxyRes, requestHeaders, {
body, res, provider, requestId, req, targetHost, startTime, span, requestBytes,
hasRetried,
modelNotSupportedRetryCount,
onRetry: (retryHeaders) => sendUpstreamRequest(retryHeaders, {
body, targetHost, upstreamPath, req, res, provider, requestId, startTime, span, requestBytes,
hasRetried: true,
modelNotSupportedRetryCount,
}),
onModelNotSupportedRetry: () => {
const delayMs = MODEL_NOT_SUPPORTED_RETRY_DELAYS_MS[modelNotSupportedRetryCount] ?? 2000;
_sleep(delayMs).then(() => {
sendUpstreamRequest(requestHeaders, {
body, targetHost, upstreamPath, req, res, provider, requestId, startTime, span, requestBytes,
hasRetried,
modelNotSupportedRetryCount: modelNotSupportedRetryCount + 1,
});
});
},
});
});

Expand Down Expand Up @@ -500,4 +528,6 @@ module.exports = {
getAndClearPendingSteeringMessage,
getAndClearPendingTimeoutSteeringMessage,
injectSteeringMessage,
_setSleepForTests,
_resetSleepForTests,
};
302 changes: 302 additions & 0 deletions containers/api-proxy/server.model-not-supported.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,302 @@
/**
* Tests for the transient Copilot "400 model not supported" retry logic.
*
* When the Copilot catalogue API returns a model set that does not include the
* requested model, the proxy retries the request up to MAX_MODEL_NOT_SUPPORTED_RETRIES
* times with a configurable backoff delay before surfacing the error to the caller.
*/

const https = require('https');
const { EventEmitter } = require('events');

const originalHttpsProxy = process.env.HTTPS_PROXY;
let proxyRequest;
let _setSleepForTests;
let _resetSleepForTests;

beforeAll(() => {
delete process.env.HTTPS_PROXY;
jest.resetModules();
({ proxyRequest } = require('./server'));
({ _setSleepForTests, _resetSleepForTests } = require('./proxy-request'));
// Make retries instant — no real setTimeout delays in unit tests.
_setSleepForTests(() => Promise.resolve());
});

afterAll(() => {
_resetSleepForTests();
if (originalHttpsProxy === undefined) {
delete process.env.HTTPS_PROXY;
} else {
process.env.HTTPS_PROXY = originalHttpsProxy;
}
jest.resetModules();
});

// ── helpers ───────────────────────────────────────────────────────────────────

function makeReq(headers = {}) {
const req = new EventEmitter();
req.url = '/v1/chat/completions';
req.method = 'POST';
req.headers = { 'content-type': 'application/json', ...headers };
return req;
}

function makeRes() {
const res = {
headersSent: false,
setHeader: jest.fn(),
writeHead: jest.fn(() => { res.headersSent = true; }),
end: jest.fn(),
destroy: jest.fn(),
};
return res;
}

function makeProxyReq() {
const proxyReq = new EventEmitter();
proxyReq.end = jest.fn();
proxyReq.write = jest.fn();
proxyReq.destroy = jest.fn();
return proxyReq;
}

function makeProxyRes(statusCode, headers = { 'content-type': 'application/json' }) {
const proxyRes = new EventEmitter();
proxyRes.statusCode = statusCode;
proxyRes.headers = headers;
proxyRes.pipe = jest.fn();
return proxyRes;
}

/** Flush all pending microtasks/promises so async retry callbacks can run. */
function flushPromises() {
return new Promise(resolve => setImmediate(resolve));
}

function getStructuredLogs(writeSpy, eventName) {
return writeSpy.mock.calls
.map(([line]) => { try { return JSON.parse(line); } catch { return null; } })
.filter(entry => entry && entry.event === eventName);
}

// ── tests ─────────────────────────────────────────────────────────────────────

describe('proxyRequest copilot model-not-supported retry', () => {
afterEach(() => {
jest.restoreAllMocks();
});

it('retries once after Copilot returns 400 model not supported, then succeeds', async () => {
const stdoutWriteSpy = jest.spyOn(process.stdout, 'write').mockImplementation(() => true);
const responseHandlers = [];
const capturedOptions = [];

jest.spyOn(https, 'request').mockImplementation((options, cb) => {
capturedOptions.push(options);
responseHandlers.push(cb);
return makeProxyReq();
});

const req = makeReq();
const res = makeRes();
proxyRequest(req, res, 'api.githubcopilot.com', { Authorization: '******' }, 'copilot');
req.emit('end');

expect(capturedOptions).toHaveLength(1);

// First response: 400 model not supported
const firstResponse = makeProxyRes(400);
responseHandlers[0](firstResponse);
firstResponse.emit('data', Buffer.from(
'{"message":"The requested model is not supported"}'
));
firstResponse.emit('end');

await flushPromises();

// Retry should have been dispatched
expect(capturedOptions).toHaveLength(2);

// Second response: 200 success
const secondResponse = makeProxyRes(200);
responseHandlers[1](secondResponse);
expect(res.writeHead).toHaveBeenCalledWith(200, expect.objectContaining({
'x-request-id': expect.any(String),
}));

const retryLogs = getStructuredLogs(stdoutWriteSpy, 'model_not_supported_retry');
expect(retryLogs).toHaveLength(1);
expect(retryLogs[0]).toMatchObject({
provider: 'copilot',
retry_attempt: 1,
max_retries: 2,
});
});

it('retries a second time when the first retry also returns 400 model not supported', async () => {
jest.spyOn(process.stdout, 'write').mockImplementation(() => true);
const responseHandlers = [];
const capturedOptions = [];

jest.spyOn(https, 'request').mockImplementation((options, cb) => {
capturedOptions.push(options);
responseHandlers.push(cb);
return makeProxyReq();
});

const req = makeReq();
const res = makeRes();
proxyRequest(req, res, 'api.githubcopilot.com', { Authorization: '******' }, 'copilot');
req.emit('end');

// First attempt: 400 model not supported → retry 1
const resp1 = makeProxyRes(400);
responseHandlers[0](resp1);
resp1.emit('data', Buffer.from('{"message":"The requested model is not supported"}'));
resp1.emit('end');
await flushPromises();

expect(capturedOptions).toHaveLength(2);

// Retry 1: 400 model not supported → retry 2
const resp2 = makeProxyRes(400);
responseHandlers[1](resp2);
resp2.emit('data', Buffer.from('{"message":"The requested model is not supported"}'));
resp2.emit('end');
await flushPromises();

expect(capturedOptions).toHaveLength(3);

// Retry 2: 200 success
const resp3 = makeProxyRes(200);
responseHandlers[2](resp3);
expect(res.writeHead).toHaveBeenCalledWith(200, expect.any(Object));
});

it('surfaces the 400 to the client after exhausting all retries', async () => {
jest.spyOn(process.stdout, 'write').mockImplementation(() => true);
const responseHandlers = [];
const capturedOptions = [];

jest.spyOn(https, 'request').mockImplementation((options, cb) => {
capturedOptions.push(options);
responseHandlers.push(cb);
return makeProxyReq();
});

const req = makeReq();
const res = makeRes();
proxyRequest(req, res, 'api.githubcopilot.com', { Authorization: '******' }, 'copilot');
req.emit('end');

const errorBody = '{"message":"The requested model is not supported"}';

// All 3 attempts return 400 model not supported
for (let attempt = 0; attempt < 3; attempt++) {
const resp = makeProxyRes(400);
responseHandlers[attempt](resp);
resp.emit('data', Buffer.from(errorBody));
resp.emit('end');
await flushPromises();
}

// 3 total attempts (original + 2 retries), no 4th
expect(capturedOptions).toHaveLength(3);
expect(res.writeHead).toHaveBeenCalledWith(400, expect.objectContaining({
'x-request-id': expect.any(String),
}));
expect(res.end).toHaveBeenCalledWith(Buffer.from(errorBody));
});

it('does not retry a 400 that is not model-not-supported', async () => {
jest.spyOn(process.stdout, 'write').mockImplementation(() => true);
const responseHandlers = [];
const capturedOptions = [];

jest.spyOn(https, 'request').mockImplementation((options, cb) => {
capturedOptions.push(options);
responseHandlers.push(cb);
return makeProxyReq();
});

const req = makeReq();
const res = makeRes();
proxyRequest(req, res, 'api.githubcopilot.com', { Authorization: '******' }, 'copilot');
req.emit('end');

const resp = makeProxyRes(400);
responseHandlers[0](resp);
resp.emit('data', Buffer.from('{"message":"max_tokens exceeded"}'));
resp.emit('end');
await flushPromises();

// No retry for unrelated 400
expect(capturedOptions).toHaveLength(1);
expect(res.writeHead).toHaveBeenCalledWith(400, expect.any(Object));
});

it('does not retry model-not-supported for non-copilot providers', async () => {
jest.spyOn(process.stdout, 'write').mockImplementation(() => true);
const responseHandlers = [];
const capturedOptions = [];

jest.spyOn(https, 'request').mockImplementation((options, cb) => {
capturedOptions.push(options);
responseHandlers.push(cb);
return makeProxyReq();
});

const req = makeReq();
const res = makeRes();
// Use openai provider — model-not-supported retry only applies to copilot
proxyRequest(req, res, 'api.openai.com', { Authorization: '******' }, 'openai');
req.emit('end');

const resp = makeProxyRes(400);
responseHandlers[0](resp);
resp.emit('data', Buffer.from('{"message":"The requested model is not supported"}'));
resp.emit('end');
await flushPromises();

expect(capturedOptions).toHaveLength(1);
expect(res.writeHead).toHaveBeenCalledWith(400, expect.any(Object));
});

it('sends an identical request body on retry', async () => {
jest.spyOn(process.stdout, 'write').mockImplementation(() => true);
const responseHandlers = [];
const capturedOptions = [];
const capturedBodies = [];

jest.spyOn(https, 'request').mockImplementation((options, cb) => {
capturedOptions.push(options);
responseHandlers.push(cb);
const proxyReq = makeProxyReq();
proxyReq.write = jest.fn(chunk => capturedBodies.push(chunk));
return proxyReq;
});

const req = makeReq();
const requestPayload = '{"model":"claude-opus-4.6","messages":[{"role":"user","content":"hi"}]}';
const res = makeRes();
proxyRequest(req, res, 'api.githubcopilot.com', { Authorization: '******' }, 'copilot');
req.emit('data', Buffer.from(requestPayload));
req.emit('end');

const resp1 = makeProxyRes(400);
responseHandlers[0](resp1);
resp1.emit('data', Buffer.from('{"message":"The requested model is not supported"}'));
resp1.emit('end');
await flushPromises();

expect(capturedOptions).toHaveLength(2);
// Both attempts should carry the same body
expect(capturedBodies[0].toString()).toBe(capturedBodies[1].toString());

const resp2 = makeProxyRes(200);
responseHandlers[1](resp2);
expect(res.writeHead).toHaveBeenCalledWith(200, expect.any(Object));
});
});
Loading
Loading