From 43699d65fc35796ac8805a1d5f9da21e1ebbb050 Mon Sep 17 00:00:00 2001 From: Curtis Galione Date: Thu, 5 Feb 2026 14:02:16 -0800 Subject: [PATCH 1/2] Add gpt-5.1 models to Responses API routing Previously, only gpt-5-pro and gpt-5-codex were routed to OpenAI's Responses API. This caused gpt-5.1 models (including gpt-5.1-codex and gpt-5.1-codex-mini) to incorrectly use the Chat Completions API, resulting in errors. This change adds gpt-5.1 model prefix to the routing logic so all gpt-5.1 models (gpt-5.1, gpt-5.1-codex, gpt-5.1-codex-mini, etc.) are properly routed to the Responses API via fetchOpenAIResponsesTranslate. Fixes issue where customers using invoke() or online scorers with gpt-5.1-codex models were getting errors due to incorrect endpoint routing. Related models affected: - gpt-5.1 - gpt-5.1-2025-11-13 - gpt-5.1-chat-latest - gpt-5.1-codex - gpt-5.1-codex-mini --- packages/proxy/src/proxy.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/proxy/src/proxy.ts b/packages/proxy/src/proxy.ts index ffba88a7..c09637dd 100644 --- a/packages/proxy/src/proxy.ts +++ b/packages/proxy/src/proxy.ts @@ -2089,7 +2089,8 @@ async function fetchOpenAI( bodyData?.model?.startsWith("o1-pro") || bodyData?.model?.startsWith("o3-pro") || bodyData?.model?.startsWith("gpt-5-pro") || - bodyData?.model?.startsWith("gpt-5-codex") + bodyData?.model?.startsWith("gpt-5-codex") || + bodyData?.model?.startsWith("gpt-5.1") ) { return fetchOpenAIResponsesTranslate({ headers, From c9397441991e58b9a02159995af529b4ceab484c Mon Sep 17 00:00:00 2001 From: Curtis Galione Date: Thu, 5 Feb 2026 14:17:10 -0800 Subject: [PATCH 2/2] Route gpt-5.x-codex models to Responses API Updated routing logic to target all gpt-5 codex variants using a flexible pattern that matches gpt-5-codex, gpt-5.1-codex, gpt-5.2-codex, gpt-5.3-codex, and any future gpt-5.x-codex models. Changes: - Added flexible pattern: starts with 'gpt-5' AND contains '-codex' (proxy.ts:2092-2093) - Updated test to validate gpt-5.x-codex routing (openai.test.ts) Models affected: - gpt-5-codex (existing) - gpt-5.1-codex - gpt-5.1-codex-mini - gpt-5.2-codex (future) - gpt-5.3-codex (future) - Any future gpt-5.x-codex variants Other gpt-5 models (gpt-5-mini, gpt-5.1, gpt-5.1-chat-latest, etc.) will continue to use the Chat Completions API as before. Fixes customer issue where gpt-5.x-codex models failed with invoke() and online scorers due to incorrect endpoint routing. --- packages/proxy/src/providers/openai.test.ts | 49 ++++++++++++--------- packages/proxy/src/proxy.ts | 4 +- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/packages/proxy/src/providers/openai.test.ts b/packages/proxy/src/providers/openai.test.ts index f9b8983a..2aaf00e9 100644 --- a/packages/proxy/src/providers/openai.test.ts +++ b/packages/proxy/src/providers/openai.test.ts @@ -431,11 +431,11 @@ describe("request/response checking", () => { ]); }); - it("should convert minimal to low reasoning_effort for gpt-5.1 models", async () => { + it("should convert minimal to low reasoning_effort for gpt-5.x-codex models", async () => { const calls: InterceptedCall[] = []; server.use( http.post( - "https://api.openai.com/v1/chat/completions", + "https://api.openai.com/v1/responses", async ({ request: req }) => { const request: InterceptedRequest = { method: req.method, @@ -443,29 +443,35 @@ describe("request/response checking", () => { body: await req.json(), }; - // Mock a successful response + // Mock a successful responses API response const response: InterceptedResponse = { status: 200, body: { - id: "chatcmpl-test", - object: "chat.completion", - created: 1234567890, - model: "gpt-5.1", - choices: [ + id: "resp-test", + object: "response", + created_at: 1234567890, + model: "gpt-5.1-codex", + output: [ { - index: 0, - message: { - role: "assistant", - content: "Test response", - refusal: null, - }, - finish_reason: "stop", + type: "message", + content: [ + { + type: "output_text", + text: "Test response", + }, + ], }, ], usage: { - prompt_tokens: 10, - completion_tokens: 5, + input_tokens: 10, + output_tokens: 5, total_tokens: 15, + input_tokens_details: { + cached_tokens: 0, + }, + output_tokens_details: { + reasoning_tokens: 0, + }, }, }, }; @@ -481,7 +487,7 @@ describe("request/response checking", () => { await callProxyV1({ body: { - model: "gpt-5.1", + model: "gpt-5.1-codex", reasoning_effort: "minimal", stream: false, messages: [ @@ -497,9 +503,12 @@ describe("request/response checking", () => { }); expect(calls.length).toBe(1); + // gpt-5.x-codex models are routed to Responses API, which uses reasoning.effort instead of reasoning_effort expect(calls[0].request.body).toMatchObject({ - model: "gpt-5.1", - reasoning_effort: "low", // minimal should be converted to low for gpt-5.1 + model: "gpt-5.1-codex", + reasoning: { + effort: "low", // minimal should be converted to low for gpt-5.x-codex + }, }); }); }); diff --git a/packages/proxy/src/proxy.ts b/packages/proxy/src/proxy.ts index c09637dd..12a644e4 100644 --- a/packages/proxy/src/proxy.ts +++ b/packages/proxy/src/proxy.ts @@ -2089,8 +2089,8 @@ async function fetchOpenAI( bodyData?.model?.startsWith("o1-pro") || bodyData?.model?.startsWith("o3-pro") || bodyData?.model?.startsWith("gpt-5-pro") || - bodyData?.model?.startsWith("gpt-5-codex") || - bodyData?.model?.startsWith("gpt-5.1") + (bodyData?.model?.startsWith("gpt-5") && + bodyData?.model?.includes("-codex")) ) { return fetchOpenAIResponsesTranslate({ headers,