Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions extensions/cli/src/stream/streamChatResponse.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,20 @@ import type { ChatHistoryItem } from "core/index.js";
import type { ChatCompletionChunk } from "openai/resources/chat/completions.mjs";
import { vi } from "vitest";

vi.mock("fdir", () => ({
fdir: class {
withBasePath() {
return this;
}
filter() {
return this;
}
crawl() {
return new Set<string>();
}
},
}));

import { toolPermissionManager } from "../permissions/permissionManager.js";
import { ToolCall } from "../tools/index.js";
import { readFileTool } from "../tools/readFile.js";
Expand Down Expand Up @@ -252,6 +266,135 @@ describe("processStreamingResponse - content preservation", () => {
expect(result.finalContent).toBe("Hello world!");
});

it("routes gpt-5 models through responsesStream and preserves streaming tool updates", async () => {
const gpt5Chunks: ChatCompletionChunk[] = [
{
id: "resp_gpt5",
object: "chat.completion.chunk",
created: Date.now(),
model: "gpt-5",
choices: [
{
index: 0,
delta: { role: "assistant" },
finish_reason: null,
},
],
},
{
id: "resp_gpt5",
object: "chat.completion.chunk",
created: Date.now(),
model: "gpt-5",
choices: [
{
index: 0,
delta: { content: "Analyzing repository…" },
finish_reason: null,
},
],
},
{
id: "resp_gpt5",
object: "chat.completion.chunk",
created: Date.now(),
model: "gpt-5",
choices: [
{
index: 0,
delta: {
tool_calls: [
{
index: 0,
id: "call_final",
type: "function",
function: {
name: "searchDocs",
arguments: '{"query":"unit',
},
},
],
},
finish_reason: null,
},
],
},
{
id: "resp_gpt5",
object: "chat.completion.chunk",
created: Date.now(),
model: "gpt-5",
choices: [
{
index: 0,
delta: {
tool_calls: [
{
index: 0,
type: "function",
function: {
arguments: ' tests"}',
},
},
],
},
finish_reason: null,
},
],
},
{
id: "resp_gpt5",
object: "chat.completion.chunk",
created: Date.now(),
model: "gpt-5",
choices: [
{
index: 0,
delta: {},
finish_reason: "tool_calls",
},
],
},
];

const responsesStream = vi.fn().mockImplementation(async function* () {
for (const chunk of gpt5Chunks) {
yield chunk;
}
});
const chatCompletionStream = vi.fn().mockImplementation(async function* () {
throw new Error("chatCompletionStream should not be used for gpt-5");
});

mockLlmApi = {
responsesStream,
chatCompletionStream,
} as unknown as BaseLlmApi;

mockModel = {
model: "gpt-5-preview",
provider: "openai",
} as unknown as ModelConfig;

const result = await processStreamingResponse({
chatHistory,
model: mockModel,
llmApi: mockLlmApi,
abortController: mockAbortController,
});

expect(responsesStream).toHaveBeenCalledTimes(1);
expect(chatCompletionStream).not.toHaveBeenCalled();
expect(result.content).toBe("Analyzing repository…");
expect(result.toolCalls).toHaveLength(1);
expect(result.toolCalls[0]).toMatchObject({
id: "call_final",
name: "searchDocs",
arguments: { query: "unit tests" },
});
expect(result.shouldContinue).toBe(true);
});

it("handles provider that only sends tool ID in first chunk then uses index", async () => {
chunks = [
contentChunk("I'll read the README.md file for you and then say hello!"),
Expand Down
18 changes: 17 additions & 1 deletion extensions/cli/src/util/exponentialBackoff.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { BaseLlmApi } from "@continuedev/openai-adapters";
import { BaseLlmApi, isResponsesModel } from "@continuedev/openai-adapters";
import type { ChatCompletionCreateParamsStreaming } from "openai/resources.mjs";

import { error, warn } from "../logging.js";
Expand Down Expand Up @@ -173,6 +173,14 @@ export async function chatCompletionStreamWithBackoff(
throw new Error("Request aborted");
}

const useResponses =
typeof llmApi.responsesStream === "function" &&
isResponsesModel(params.model);

if (useResponses) {
return llmApi.responsesStream!(params, abortSignal);
}

return llmApi.chatCompletionStream(params, abortSignal);
} catch (err: any) {
lastError = err;
Expand All @@ -189,6 +197,14 @@ export async function chatCompletionStreamWithBackoff(

// Only retry if the error is retryable
if (!isRetryableError(err)) {
// Log full error details for non-retryable errors
logger.error("Non-retryable LLM API error", err, {
status: err.status,
statusText: err.statusText,
message: err.message,
error: err.error,
model: params.model,
});
throw err;
}

Expand Down
1 change: 1 addition & 0 deletions packages/config-yaml/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
export * from "./browser.js";
export * from "./registryClient.js";
export { parseAgentFileRules } from "./markdown/agentFiles.js";
64 changes: 64 additions & 0 deletions packages/openai-adapters/src/apis/OpenAI.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,20 @@ import {
CompletionCreateParamsStreaming,
Model,
} from "openai/resources/index";
import type {
Response,
ResponseStreamEvent,
} from "openai/resources/responses/responses.js";
import { z } from "zod";
import { OpenAIConfigSchema } from "../types.js";
import { customFetch } from "../util.js";
import {
createResponsesStreamState,
fromResponsesChunk,
isResponsesModel,
responseToChatCompletion,
toResponsesParams,
} from "./openaiResponses.js";
import {
BaseLlmApi,
CreateRerankResponse,
Expand Down Expand Up @@ -63,6 +74,11 @@ export class OpenAIApi implements BaseLlmApi {
return body;
}

protected shouldUseResponsesEndpoint(model: string): boolean {
const isOfficialOpenAIAPI = this.apiBase === "https://api.openai.com/v1/";
return isOfficialOpenAIAPI && isResponsesModel(model);
}

modifyCompletionBody<
T extends
| CompletionCreateParamsNonStreaming
Expand Down Expand Up @@ -98,6 +114,10 @@ export class OpenAIApi implements BaseLlmApi {
body: ChatCompletionCreateParamsNonStreaming,
signal: AbortSignal,
): Promise<ChatCompletion> {
if (this.shouldUseResponsesEndpoint(body.model)) {
const response = await this.responsesNonStream(body, signal);
return responseToChatCompletion(response);
}
const response = await this.openai.chat.completions.create(
this.modifyChatBody(body),
{
Expand All @@ -111,6 +131,12 @@ export class OpenAIApi implements BaseLlmApi {
body: ChatCompletionCreateParamsStreaming,
signal: AbortSignal,
): AsyncGenerator<ChatCompletionChunk, any, unknown> {
if (this.shouldUseResponsesEndpoint(body.model)) {
for await (const chunk of this.responsesStream(body, signal)) {
yield chunk;
}
return;
}
const response = await this.openai.chat.completions.create(
this.modifyChatBody(body),
{
Expand Down Expand Up @@ -209,4 +235,42 @@ export class OpenAIApi implements BaseLlmApi {
async list(): Promise<Model[]> {
return (await this.openai.models.list()).data;
}

async responsesNonStream(
body: ChatCompletionCreateParamsNonStreaming,
signal: AbortSignal,
): Promise<Response> {
const params = toResponsesParams({
...(body as ChatCompletionCreateParams),
stream: false,
});
return (await this.openai.responses.create(params, {
signal,
})) as Response;
}

async *responsesStream(
body: ChatCompletionCreateParamsStreaming,
signal: AbortSignal,
): AsyncGenerator<ChatCompletionChunk> {
const params = toResponsesParams({
...(body as ChatCompletionCreateParams),
stream: true,
});

const state = createResponsesStreamState({
model: body.model,
});

const stream = this.openai.responses.stream(params as any, {
signal,
});

for await (const event of stream as AsyncIterable<ResponseStreamEvent>) {
const chunk = fromResponsesChunk(state, event);
if (chunk) {
yield chunk;
}
}
}
}
11 changes: 11 additions & 0 deletions packages/openai-adapters/src/apis/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
EmbeddingCreateParams,
Model,
} from "openai/resources/index";
import type { Response } from "openai/resources/responses/responses.js";

export interface FimCreateParamsStreaming
extends CompletionCreateParamsStreaming {
Expand Down Expand Up @@ -50,6 +51,16 @@ export interface BaseLlmApi {
signal: AbortSignal,
): AsyncGenerator<ChatCompletionChunk>;

responsesNonStream?(
body: ChatCompletionCreateParamsNonStreaming,
signal: AbortSignal,
): Promise<Response>;

responsesStream?(
body: ChatCompletionCreateParamsStreaming,
signal: AbortSignal,
): AsyncGenerator<ChatCompletionChunk>;

// Completion, no stream
completionNonStream(
body: CompletionCreateParamsNonStreaming,
Expand Down
Loading
Loading