continuedev · Patrick-Erichsen · Oct 29, 2025 · Oct 20, 2025 · Oct 24, 2025 · Oct 24, 2025
@@ -4,6 +4,20 @@ import type { ChatHistoryItem } from "core/index.js";
 import type { ChatCompletionChunk } from "openai/resources/chat/completions.mjs";
 import { vi } from "vitest";
 
+vi.mock("fdir", () => ({
+  fdir: class {
+    withBasePath() {
+      return this;
+    }
+    filter() {
+      return this;
+    }
+    crawl() {
+      return new Set<string>();
+    }
+  },
+}));
+
 import { toolPermissionManager } from "../permissions/permissionManager.js";
 import { ToolCall } from "../tools/index.js";
 import { readFileTool } from "../tools/readFile.js";
@@ -252,6 +266,135 @@ describe("processStreamingResponse - content preservation", () => {
     expect(result.finalContent).toBe("Hello world!");
   });
 
+  it("routes gpt-5 models through responsesStream and preserves streaming tool updates", async () => {
+    const gpt5Chunks: ChatCompletionChunk[] = [
+      {
+        id: "resp_gpt5",
+        object: "chat.completion.chunk",
+        created: Date.now(),
+        model: "gpt-5",
+        choices: [
+          {
+            index: 0,
+            delta: { role: "assistant" },
+            finish_reason: null,
+          },
+        ],
+      },
+      {
+        id: "resp_gpt5",
+        object: "chat.completion.chunk",
+        created: Date.now(),
+        model: "gpt-5",
+        choices: [
+          {
+            index: 0,
+            delta: { content: "Analyzing repository…" },
+            finish_reason: null,
+          },
+        ],
+      },
+      {
+        id: "resp_gpt5",
+        object: "chat.completion.chunk",
+        created: Date.now(),
+        model: "gpt-5",
+        choices: [
+          {
+            index: 0,
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  id: "call_final",
+                  type: "function",
+                  function: {
+                    name: "searchDocs",
+                    arguments: '{"query":"unit',
+                  },
+                },
+              ],
+            },
+            finish_reason: null,
+          },
+        ],
+      },
+      {
+        id: "resp_gpt5",
+        object: "chat.completion.chunk",
+        created: Date.now(),
+        model: "gpt-5",
+        choices: [
+          {
+            index: 0,
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  type: "function",
+                  function: {
+                    arguments: ' tests"}',
+                  },
+                },
+              ],
+            },
+            finish_reason: null,
+          },
+        ],
+      },
+      {
+        id: "resp_gpt5",
+        object: "chat.completion.chunk",
+        created: Date.now(),
+        model: "gpt-5",
+        choices: [
+          {
+            index: 0,
+            delta: {},
+            finish_reason: "tool_calls",
+          },
+        ],
+      },
+    ];
+
+    const responsesStream = vi.fn().mockImplementation(async function* () {
+      for (const chunk of gpt5Chunks) {
+        yield chunk;
+      }
+    });
+    const chatCompletionStream = vi.fn().mockImplementation(async function* () {
+      throw new Error("chatCompletionStream should not be used for gpt-5");
+    });
+
+    mockLlmApi = {
+      responsesStream,
+      chatCompletionStream,
+    } as unknown as BaseLlmApi;
+
+    mockModel = {
+      model: "gpt-5-preview",
+      provider: "openai",
+    } as unknown as ModelConfig;
+
+    const result = await processStreamingResponse({
+      chatHistory,
+      model: mockModel,
+      llmApi: mockLlmApi,
+      abortController: mockAbortController,
+    });
+
+    expect(responsesStream).toHaveBeenCalledTimes(1);
+    expect(chatCompletionStream).not.toHaveBeenCalled();
+    expect(result.content).toBe("Analyzing repository…");
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls[0]).toMatchObject({
+      id: "call_final",
+      name: "searchDocs",
+      arguments: { query: "unit tests" },
+    });
+    expect(result.shouldContinue).toBe(true);
+  });
+
   it("handles provider that only sends tool ID in first chunk then uses index", async () => {
     chunks = [
       contentChunk("I'll read the README.md file for you and then say hello!"),

@@ -1,4 +1,4 @@
-import { BaseLlmApi } from "@continuedev/openai-adapters";
+import { BaseLlmApi, isResponsesModel } from "@continuedev/openai-adapters";
 import type { ChatCompletionCreateParamsStreaming } from "openai/resources.mjs";
 
 import { error, warn } from "../logging.js";
@@ -173,6 +173,14 @@ export async function chatCompletionStreamWithBackoff(
         throw new Error("Request aborted");
       }
 
+      const useResponses =
+        typeof llmApi.responsesStream === "function" &&
+        isResponsesModel(params.model);
+
+      if (useResponses) {
+        return llmApi.responsesStream!(params, abortSignal);
+      }
+
       return llmApi.chatCompletionStream(params, abortSignal);
     } catch (err: any) {
       lastError = err;
@@ -189,6 +197,14 @@ export async function chatCompletionStreamWithBackoff(
 
       // Only retry if the error is retryable
       if (!isRetryableError(err)) {
+        // Log full error details for non-retryable errors
+        logger.error("Non-retryable LLM API error", err, {
+          status: err.status,
+          statusText: err.statusText,
+          message: err.message,
+          error: err.error,
+          model: params.model,
+        });
         throw err;
       }
 

@@ -1,2 +1,3 @@
 export * from "./browser.js";
 export * from "./registryClient.js";
+export { parseAgentFileRules } from "./markdown/agentFiles.js";
@@ -11,9 +11,20 @@ import {
   CompletionCreateParamsStreaming,
   Model,
 } from "openai/resources/index";
+import type {
+  Response,
+  ResponseStreamEvent,
+} from "openai/resources/responses/responses.js";
 import { z } from "zod";
 import { OpenAIConfigSchema } from "../types.js";
 import { customFetch } from "../util.js";
+import {
+  createResponsesStreamState,
+  fromResponsesChunk,
+  isResponsesModel,
+  responseToChatCompletion,
+  toResponsesParams,
+} from "./openaiResponses.js";
 import {
   BaseLlmApi,
   CreateRerankResponse,
@@ -63,6 +74,11 @@ export class OpenAIApi implements BaseLlmApi {
     return body;
   }
 
+  protected shouldUseResponsesEndpoint(model: string): boolean {
+    const isOfficialOpenAIAPI = this.apiBase === "https://api.openai.com/v1/";
+    return isOfficialOpenAIAPI && isResponsesModel(model);
+  }
+
   modifyCompletionBody<
     T extends
       | CompletionCreateParamsNonStreaming
@@ -98,6 +114,10 @@ export class OpenAIApi implements BaseLlmApi {
     body: ChatCompletionCreateParamsNonStreaming,
     signal: AbortSignal,
   ): Promise<ChatCompletion> {
+    if (this.shouldUseResponsesEndpoint(body.model)) {
+      const response = await this.responsesNonStream(body, signal);
+      return responseToChatCompletion(response);
+    }
     const response = await this.openai.chat.completions.create(
       this.modifyChatBody(body),
       {
@@ -111,6 +131,12 @@ export class OpenAIApi implements BaseLlmApi {
     body: ChatCompletionCreateParamsStreaming,
     signal: AbortSignal,
   ): AsyncGenerator<ChatCompletionChunk, any, unknown> {
+    if (this.shouldUseResponsesEndpoint(body.model)) {
+      for await (const chunk of this.responsesStream(body, signal)) {
+        yield chunk;
+      }
+      return;
+    }
     const response = await this.openai.chat.completions.create(
       this.modifyChatBody(body),
       {
@@ -209,4 +235,42 @@ export class OpenAIApi implements BaseLlmApi {
   async list(): Promise<Model[]> {
     return (await this.openai.models.list()).data;
   }
+
+  async responsesNonStream(
+    body: ChatCompletionCreateParamsNonStreaming,
+    signal: AbortSignal,
+  ): Promise<Response> {
+    const params = toResponsesParams({
+      ...(body as ChatCompletionCreateParams),
+      stream: false,
+    });
+    return (await this.openai.responses.create(params, {
+      signal,
+    })) as Response;
+  }
+
+  async *responsesStream(
+    body: ChatCompletionCreateParamsStreaming,
+    signal: AbortSignal,
+  ): AsyncGenerator<ChatCompletionChunk> {
+    const params = toResponsesParams({
+      ...(body as ChatCompletionCreateParams),
+      stream: true,
+    });
+
+    const state = createResponsesStreamState({
+      model: body.model,
+    });
+
+    const stream = this.openai.responses.stream(params as any, {
+      signal,
+    });
+
+    for await (const event of stream as AsyncIterable<ResponseStreamEvent>) {
+      const chunk = fromResponsesChunk(state, event);
+      if (chunk) {
+        yield chunk;
+      }
+    }
+  }
 }
@@ -10,6 +10,7 @@ import {
   EmbeddingCreateParams,
   Model,
 } from "openai/resources/index";
+import type { Response } from "openai/resources/responses/responses.js";
 
 export interface FimCreateParamsStreaming
   extends CompletionCreateParamsStreaming {
@@ -50,6 +51,16 @@ export interface BaseLlmApi {
     signal: AbortSignal,
   ): AsyncGenerator<ChatCompletionChunk>;
 
+  responsesNonStream?(
+    body: ChatCompletionCreateParamsNonStreaming,
+    signal: AbortSignal,
+  ): Promise<Response>;
+
+  responsesStream?(
+    body: ChatCompletionCreateParamsStreaming,
+    signal: AbortSignal,
+  ): AsyncGenerator<ChatCompletionChunk>;
+
   // Completion, no stream
   completionNonStream(
     body: CompletionCreateParamsNonStreaming,