feat: impl Responses API in oai-adapters (#8417)

Patrick-Erichsen · web-flow · commit 67033627b320 · 2025-10-29T13:17:32.000-07:00
* feat: add GPT-5 Codex model support

- Add GPT-5 Codex to llm-info package with 500k context and 150k max tokens
- Add model definition to models.ts for UI configuration
- Include GPT-5 Codex in OpenAI provider packages list
- Model supports chat and edit roles with tool_use capability

* feat: impl Responses API in oai-adapters

* fix chat describer

* Update openai.ts

* fix llm-info

* address dallin's feedback
diff --git a/core/util/chatDescriber.test.ts b/core/util/chatDescriber.test.ts
@@ -30,7 +30,7 @@ describe("ChatDescriber", () => {
       expect(result).toBeUndefined();
     });
 
-    it("should set completionOptions.maxTokens to 12", async () => {
+    it("should set completionOptions.maxTokens to 16", async () => {
       const message = "Test message";
       const completionOptions: LLMFullCompletionOptions = { temperature: 0.7 };
 
diff --git a/core/util/chatDescriber.ts b/core/util/chatDescriber.ts
@@ -8,7 +8,7 @@ import { renderChatMessage } from "./messageContent";
 import { convertFromUnifiedHistory } from "./messageConversion";
 
 export class ChatDescriber {
-  static maxTokens = 12;
+  static maxTokens = 16; // Increased from 12 to meet GPT-5 minimum requirement
   static prompt: string | undefined =
     "Given the following... please reply with a title for the chat that is 3-4 words in length, all words used should be directly related to the content of the chat, avoid using verbs unless they are directly related to the content of the chat, no additional text or explanation, you don't need ending punctuation.\n\n";
   static messenger: IMessenger<ToCoreProtocol, FromCoreProtocol>;
diff --git a/extensions/cli/package-lock.json b/extensions/cli/package-lock.json
diff --git a/extensions/cli/src/stream/streamChatResponse.test.ts b/extensions/cli/src/stream/streamChatResponse.test.ts
@@ -252,6 +252,135 @@ describe("processStreamingResponse - content preservation", () => {
     expect(result.finalContent).toBe("Hello world!");
   });
 
+  it("routes gpt-5 models through responsesStream and preserves streaming tool updates", async () => {
+    const gpt5Chunks: ChatCompletionChunk[] = [
+      {
+        id: "resp_gpt5",
+        object: "chat.completion.chunk",
+        created: Date.now(),
+        model: "gpt-5",
+        choices: [
+          {
+            index: 0,
+            delta: { role: "assistant" },
+            finish_reason: null,
+          },
+        ],
+      },
+      {
+        id: "resp_gpt5",
+        object: "chat.completion.chunk",
+        created: Date.now(),
+        model: "gpt-5",
+        choices: [
+          {
+            index: 0,
+            delta: { content: "Analyzing repository…" },
+            finish_reason: null,
+          },
+        ],
+      },
+      {
+        id: "resp_gpt5",
+        object: "chat.completion.chunk",
+        created: Date.now(),
+        model: "gpt-5",
+        choices: [
+          {
+            index: 0,
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  id: "call_final",
+                  type: "function",
+                  function: {
+                    name: "searchDocs",
+                    arguments: '{"query":"unit',
+                  },
+                },
+              ],
+            },
+            finish_reason: null,
+          },
+        ],
+      },
+      {
+        id: "resp_gpt5",
+        object: "chat.completion.chunk",
+        created: Date.now(),
+        model: "gpt-5",
+        choices: [
+          {
+            index: 0,
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  type: "function",
+                  function: {
+                    arguments: ' tests"}',
+                  },
+                },
+              ],
+            },
+            finish_reason: null,
+          },
+        ],
+      },
+      {
+        id: "resp_gpt5",
+        object: "chat.completion.chunk",
+        created: Date.now(),
+        model: "gpt-5",
+        choices: [
+          {
+            index: 0,
+            delta: {},
+            finish_reason: "tool_calls",
+          },
+        ],
+      },
+    ];
+
+    const responsesStream = vi.fn().mockImplementation(async function* () {
+      for (const chunk of gpt5Chunks) {
+        yield chunk;
+      }
+    });
+    const chatCompletionStream = vi.fn().mockImplementation(async function* () {
+      throw new Error("chatCompletionStream should not be used for gpt-5");
+    });
+
+    mockLlmApi = {
+      responsesStream,
+      chatCompletionStream,
+    } as unknown as BaseLlmApi;
+
+    mockModel = {
+      model: "gpt-5-preview",
+      provider: "openai",
+    } as unknown as ModelConfig;
+
+    const result = await processStreamingResponse({
+      chatHistory,
+      model: mockModel,
+      llmApi: mockLlmApi,
+      abortController: mockAbortController,
+    });
+
+    expect(responsesStream).toHaveBeenCalledTimes(1);
+    expect(chatCompletionStream).not.toHaveBeenCalled();
+    expect(result.content).toBe("Analyzing repository…");
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls[0]).toMatchObject({
+      id: "call_final",
+      name: "searchDocs",
+      arguments: { query: "unit tests" },
+    });
+    expect(result.shouldContinue).toBe(true);
+  });
+
   it("handles provider that only sends tool ID in first chunk then uses index", async () => {
     chunks = [
       contentChunk("I'll read the README.md file for you and then say hello!"),
diff --git a/extensions/cli/src/util/exponentialBackoff.ts b/extensions/cli/src/util/exponentialBackoff.ts
@@ -1,4 +1,4 @@
-import { BaseLlmApi } from "@continuedev/openai-adapters";
+import { BaseLlmApi, isResponsesModel } from "@continuedev/openai-adapters";
 import type { ChatCompletionCreateParamsStreaming } from "openai/resources.mjs";
 
 import { error, warn } from "../logging.js";
@@ -173,6 +173,14 @@ export async function chatCompletionStreamWithBackoff(
         throw new Error("Request aborted");
       }
 
+      const useResponses =
+        typeof llmApi.responsesStream === "function" &&
+        isResponsesModel(params.model);
+
+      if (useResponses) {
+        return llmApi.responsesStream!(params, abortSignal);
+      }
+
       return llmApi.chatCompletionStream(params, abortSignal);
     } catch (err: any) {
       lastError = err;
@@ -189,6 +197,14 @@ export async function chatCompletionStreamWithBackoff(
 
       // Only retry if the error is retryable
       if (!isRetryableError(err)) {
+        // Log full error details for non-retryable errors
+        logger.error("Non-retryable LLM API error", err, {
+          status: err.status,
+          statusText: err.statusText,
+          message: err.message,
+          error: err.error,
+          model: params.model,
+        });
         throw err;
       }
 
diff --git a/gui/src/pages/AddNewModel/configs/models.ts b/gui/src/pages/AddNewModel/configs/models.ts
@@ -1079,6 +1079,19 @@ export const models: { [key: string]: ModelPackage } = {
     icon: "openai.png",
     isOpenSource: false,
   },
+  gpt5Codex: {
+    title: "GPT-5 Codex",
+    description:
+      "OpenAI's most advanced code generation model, optimized for programming tasks",
+    params: {
+      model: "gpt-5-codex",
+      contextLength: 400_000,
+      title: "GPT-5 Codex",
+    },
+    providerOptions: ["openai"],
+    icon: "openai.png",
+    isOpenSource: false,
+  },
   gpt4turbo: {
     title: "GPT-4 Turbo",
     description:
diff --git a/gui/src/pages/AddNewModel/configs/providers.ts b/gui/src/pages/AddNewModel/configs/providers.ts
@@ -118,6 +118,7 @@ export const providers: Partial<Record<string, ProviderInfo>> = {
     tags: [ModelProviderTags.RequiresApiKey],
     packages: [
       models.gpt5,
+      models.gpt5Codex,
       models.gpt4o,
       models.gpt4omini,
       models.gpt4turbo,
diff --git a/packages/config-yaml/src/index.ts b/packages/config-yaml/src/index.ts
@@ -1,2 +1,3 @@
 export * from "./browser.js";
 export * from "./registryClient.js";
+export { parseAgentFileRules } from "./markdown/agentFiles.js";
diff --git a/packages/llm-info/src/providers/openai.ts b/packages/llm-info/src/providers/openai.ts
@@ -79,9 +79,17 @@ export const OpenAi: ModelProvider = {
     {
       model: "gpt-5",
       displayName: "GPT-5",
+      contextLength: 128000,
+      maxCompletionTokens: 16384,
+      regex: /^gpt-5$/,
+      recommendedFor: ["chat"],
+    },
+    {
+      model: "gpt-5-codex",
+      displayName: "GPT-5 Codex",
       contextLength: 400000,
       maxCompletionTokens: 128000,
-      regex: /gpt-5/,
+      regex: /gpt-5-codex/,
       recommendedFor: ["chat"],
     },
     // gpt-4o
diff --git a/packages/openai-adapters/src/apis/OpenAI.ts b/packages/openai-adapters/src/apis/OpenAI.ts
@@ -11,9 +11,20 @@ import {
   CompletionCreateParamsStreaming,
   Model,
 } from "openai/resources/index";
+import type {
+  Response,
+  ResponseStreamEvent,
+} from "openai/resources/responses/responses.js";
 import { z } from "zod";
 import { OpenAIConfigSchema } from "../types.js";
 import { customFetch } from "../util.js";
+import {
+  createResponsesStreamState,
+  fromResponsesChunk,
+  isResponsesModel,
+  responseToChatCompletion,
+  toResponsesParams,
+} from "./openaiResponses.js";
 import {
   BaseLlmApi,
   CreateRerankResponse,
@@ -63,6 +74,11 @@ export class OpenAIApi implements BaseLlmApi {
     return body;
   }
 
+  protected shouldUseResponsesEndpoint(model: string): boolean {
+    const isOfficialOpenAIAPI = this.apiBase === "https://api.openai.com/v1/";
+    return isOfficialOpenAIAPI && isResponsesModel(model);
+  }
+
   modifyCompletionBody<
     T extends
       | CompletionCreateParamsNonStreaming
@@ -98,6 +114,10 @@ export class OpenAIApi implements BaseLlmApi {
     body: ChatCompletionCreateParamsNonStreaming,
     signal: AbortSignal,
   ): Promise<ChatCompletion> {
+    if (this.shouldUseResponsesEndpoint(body.model)) {
+      const response = await this.responsesNonStream(body, signal);
+      return responseToChatCompletion(response);
+    }
     const response = await this.openai.chat.completions.create(
       this.modifyChatBody(body),
       {
@@ -111,6 +131,12 @@ export class OpenAIApi implements BaseLlmApi {
     body: ChatCompletionCreateParamsStreaming,
     signal: AbortSignal,
   ): AsyncGenerator<ChatCompletionChunk, any, unknown> {
+    if (this.shouldUseResponsesEndpoint(body.model)) {
+      for await (const chunk of this.responsesStream(body, signal)) {
+        yield chunk;
+      }
+      return;
+    }
     const response = await this.openai.chat.completions.create(
       this.modifyChatBody(body),
       {
@@ -209,4 +235,42 @@ export class OpenAIApi implements BaseLlmApi {
   async list(): Promise<Model[]> {
     return (await this.openai.models.list()).data;
   }
+
+  async responsesNonStream(
+    body: ChatCompletionCreateParamsNonStreaming,
+    signal: AbortSignal,
+  ): Promise<Response> {
+    const params = toResponsesParams({
+      ...(body as ChatCompletionCreateParams),
+      stream: false,
+    });
+    return (await this.openai.responses.create(params, {
+      signal,
+    })) as Response;
+  }
+
+  async *responsesStream(
+    body: ChatCompletionCreateParamsStreaming,
+    signal: AbortSignal,
+  ): AsyncGenerator<ChatCompletionChunk> {
+    const params = toResponsesParams({
+      ...(body as ChatCompletionCreateParams),
+      stream: true,
+    });
+
+    const state = createResponsesStreamState({
+      model: body.model,
+    });
+
+    const stream = this.openai.responses.stream(params as any, {
+      signal,
+    });
+
+    for await (const event of stream as AsyncIterable<ResponseStreamEvent>) {
+      const chunk = fromResponsesChunk(state, event);
+      if (chunk) {
+        yield chunk;
+      }
+    }
+  }
 }
diff --git a/packages/openai-adapters/src/apis/base.ts b/packages/openai-adapters/src/apis/base.ts
@@ -10,6 +10,7 @@ import {
   EmbeddingCreateParams,
   Model,
 } from "openai/resources/index";
+import type { Response } from "openai/resources/responses/responses.js";
 
 export interface FimCreateParamsStreaming
   extends CompletionCreateParamsStreaming {
@@ -50,6 +51,16 @@ export interface BaseLlmApi {
     signal: AbortSignal,
   ): AsyncGenerator<ChatCompletionChunk>;
 
+  responsesNonStream?(
+    body: ChatCompletionCreateParamsNonStreaming,
+    signal: AbortSignal,
+  ): Promise<Response>;
+
+  responsesStream?(
+    body: ChatCompletionCreateParamsStreaming,
+    signal: AbortSignal,
+  ): AsyncGenerator<ChatCompletionChunk>;
+
   // Completion, no stream
   completionNonStream(
     body: CompletionCreateParamsNonStreaming,
diff --git a/packages/openai-adapters/src/apis/openaiResponses.ts b/packages/openai-adapters/src/apis/openaiResponses.ts
diff --git a/packages/openai-adapters/src/index.ts b/packages/openai-adapters/src/index.ts
diff --git a/packages/openai-adapters/src/test/openai-responses.vitest.ts b/packages/openai-adapters/src/test/openai-responses.vitest.ts

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`export * from "./browser.js";`
`2`	`2`	`export * from "./registryClient.js";`
	`3`	`+export { parseAgentFileRules } from "./markdown/agentFiles.js";`