Skip to content

Commit 6703362

Browse files
feat: impl Responses API in oai-adapters (#8417)
* feat: add GPT-5 Codex model support - Add GPT-5 Codex to llm-info package with 500k context and 150k max tokens - Add model definition to models.ts for UI configuration - Include GPT-5 Codex in OpenAI provider packages list - Model supports chat and edit roles with tool_use capability * feat: impl Responses API in oai-adapters * fix chat describer * Update openai.ts * fix llm-info * address dallin's feedback
1 parent 20a8e6c commit 6703362

File tree

14 files changed

+1517
-5
lines changed

14 files changed

+1517
-5
lines changed

core/util/chatDescriber.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ describe("ChatDescriber", () => {
3030
expect(result).toBeUndefined();
3131
});
3232

33-
it("should set completionOptions.maxTokens to 12", async () => {
33+
it("should set completionOptions.maxTokens to 16", async () => {
3434
const message = "Test message";
3535
const completionOptions: LLMFullCompletionOptions = { temperature: 0.7 };
3636

core/util/chatDescriber.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import { renderChatMessage } from "./messageContent";
88
import { convertFromUnifiedHistory } from "./messageConversion";
99

1010
export class ChatDescriber {
11-
static maxTokens = 12;
11+
static maxTokens = 16; // Increased from 12 to meet GPT-5 minimum requirement
1212
static prompt: string | undefined =
1313
"Given the following... please reply with a title for the chat that is 3-4 words in length, all words used should be directly related to the content of the chat, avoid using verbs unless they are directly related to the content of the chat, no additional text or explanation, you don't need ending punctuation.\n\n";
1414
static messenger: IMessenger<ToCoreProtocol, FromCoreProtocol>;

extensions/cli/package-lock.json

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

extensions/cli/src/stream/streamChatResponse.test.ts

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,135 @@ describe("processStreamingResponse - content preservation", () => {
252252
expect(result.finalContent).toBe("Hello world!");
253253
});
254254

255+
it("routes gpt-5 models through responsesStream and preserves streaming tool updates", async () => {
256+
const gpt5Chunks: ChatCompletionChunk[] = [
257+
{
258+
id: "resp_gpt5",
259+
object: "chat.completion.chunk",
260+
created: Date.now(),
261+
model: "gpt-5",
262+
choices: [
263+
{
264+
index: 0,
265+
delta: { role: "assistant" },
266+
finish_reason: null,
267+
},
268+
],
269+
},
270+
{
271+
id: "resp_gpt5",
272+
object: "chat.completion.chunk",
273+
created: Date.now(),
274+
model: "gpt-5",
275+
choices: [
276+
{
277+
index: 0,
278+
delta: { content: "Analyzing repository…" },
279+
finish_reason: null,
280+
},
281+
],
282+
},
283+
{
284+
id: "resp_gpt5",
285+
object: "chat.completion.chunk",
286+
created: Date.now(),
287+
model: "gpt-5",
288+
choices: [
289+
{
290+
index: 0,
291+
delta: {
292+
tool_calls: [
293+
{
294+
index: 0,
295+
id: "call_final",
296+
type: "function",
297+
function: {
298+
name: "searchDocs",
299+
arguments: '{"query":"unit',
300+
},
301+
},
302+
],
303+
},
304+
finish_reason: null,
305+
},
306+
],
307+
},
308+
{
309+
id: "resp_gpt5",
310+
object: "chat.completion.chunk",
311+
created: Date.now(),
312+
model: "gpt-5",
313+
choices: [
314+
{
315+
index: 0,
316+
delta: {
317+
tool_calls: [
318+
{
319+
index: 0,
320+
type: "function",
321+
function: {
322+
arguments: ' tests"}',
323+
},
324+
},
325+
],
326+
},
327+
finish_reason: null,
328+
},
329+
],
330+
},
331+
{
332+
id: "resp_gpt5",
333+
object: "chat.completion.chunk",
334+
created: Date.now(),
335+
model: "gpt-5",
336+
choices: [
337+
{
338+
index: 0,
339+
delta: {},
340+
finish_reason: "tool_calls",
341+
},
342+
],
343+
},
344+
];
345+
346+
const responsesStream = vi.fn().mockImplementation(async function* () {
347+
for (const chunk of gpt5Chunks) {
348+
yield chunk;
349+
}
350+
});
351+
const chatCompletionStream = vi.fn().mockImplementation(async function* () {
352+
throw new Error("chatCompletionStream should not be used for gpt-5");
353+
});
354+
355+
mockLlmApi = {
356+
responsesStream,
357+
chatCompletionStream,
358+
} as unknown as BaseLlmApi;
359+
360+
mockModel = {
361+
model: "gpt-5-preview",
362+
provider: "openai",
363+
} as unknown as ModelConfig;
364+
365+
const result = await processStreamingResponse({
366+
chatHistory,
367+
model: mockModel,
368+
llmApi: mockLlmApi,
369+
abortController: mockAbortController,
370+
});
371+
372+
expect(responsesStream).toHaveBeenCalledTimes(1);
373+
expect(chatCompletionStream).not.toHaveBeenCalled();
374+
expect(result.content).toBe("Analyzing repository…");
375+
expect(result.toolCalls).toHaveLength(1);
376+
expect(result.toolCalls[0]).toMatchObject({
377+
id: "call_final",
378+
name: "searchDocs",
379+
arguments: { query: "unit tests" },
380+
});
381+
expect(result.shouldContinue).toBe(true);
382+
});
383+
255384
it("handles provider that only sends tool ID in first chunk then uses index", async () => {
256385
chunks = [
257386
contentChunk("I'll read the README.md file for you and then say hello!"),

extensions/cli/src/util/exponentialBackoff.ts

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { BaseLlmApi } from "@continuedev/openai-adapters";
1+
import { BaseLlmApi, isResponsesModel } from "@continuedev/openai-adapters";
22
import type { ChatCompletionCreateParamsStreaming } from "openai/resources.mjs";
33

44
import { error, warn } from "../logging.js";
@@ -173,6 +173,14 @@ export async function chatCompletionStreamWithBackoff(
173173
throw new Error("Request aborted");
174174
}
175175

176+
const useResponses =
177+
typeof llmApi.responsesStream === "function" &&
178+
isResponsesModel(params.model);
179+
180+
if (useResponses) {
181+
return llmApi.responsesStream!(params, abortSignal);
182+
}
183+
176184
return llmApi.chatCompletionStream(params, abortSignal);
177185
} catch (err: any) {
178186
lastError = err;
@@ -189,6 +197,14 @@ export async function chatCompletionStreamWithBackoff(
189197

190198
// Only retry if the error is retryable
191199
if (!isRetryableError(err)) {
200+
// Log full error details for non-retryable errors
201+
logger.error("Non-retryable LLM API error", err, {
202+
status: err.status,
203+
statusText: err.statusText,
204+
message: err.message,
205+
error: err.error,
206+
model: params.model,
207+
});
192208
throw err;
193209
}
194210

gui/src/pages/AddNewModel/configs/models.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,6 +1079,19 @@ export const models: { [key: string]: ModelPackage } = {
10791079
icon: "openai.png",
10801080
isOpenSource: false,
10811081
},
1082+
gpt5Codex: {
1083+
title: "GPT-5 Codex",
1084+
description:
1085+
"OpenAI's most advanced code generation model, optimized for programming tasks",
1086+
params: {
1087+
model: "gpt-5-codex",
1088+
contextLength: 400_000,
1089+
title: "GPT-5 Codex",
1090+
},
1091+
providerOptions: ["openai"],
1092+
icon: "openai.png",
1093+
isOpenSource: false,
1094+
},
10821095
gpt4turbo: {
10831096
title: "GPT-4 Turbo",
10841097
description:

gui/src/pages/AddNewModel/configs/providers.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ export const providers: Partial<Record<string, ProviderInfo>> = {
118118
tags: [ModelProviderTags.RequiresApiKey],
119119
packages: [
120120
models.gpt5,
121+
models.gpt5Codex,
121122
models.gpt4o,
122123
models.gpt4omini,
123124
models.gpt4turbo,

packages/config-yaml/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
export * from "./browser.js";
22
export * from "./registryClient.js";
3+
export { parseAgentFileRules } from "./markdown/agentFiles.js";

packages/llm-info/src/providers/openai.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,17 @@ export const OpenAi: ModelProvider = {
7979
{
8080
model: "gpt-5",
8181
displayName: "GPT-5",
82+
contextLength: 128000,
83+
maxCompletionTokens: 16384,
84+
regex: /^gpt-5$/,
85+
recommendedFor: ["chat"],
86+
},
87+
{
88+
model: "gpt-5-codex",
89+
displayName: "GPT-5 Codex",
8290
contextLength: 400000,
8391
maxCompletionTokens: 128000,
84-
regex: /gpt-5/,
92+
regex: /gpt-5-codex/,
8593
recommendedFor: ["chat"],
8694
},
8795
// gpt-4o

packages/openai-adapters/src/apis/OpenAI.ts

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,20 @@ import {
1111
CompletionCreateParamsStreaming,
1212
Model,
1313
} from "openai/resources/index";
14+
import type {
15+
Response,
16+
ResponseStreamEvent,
17+
} from "openai/resources/responses/responses.js";
1418
import { z } from "zod";
1519
import { OpenAIConfigSchema } from "../types.js";
1620
import { customFetch } from "../util.js";
21+
import {
22+
createResponsesStreamState,
23+
fromResponsesChunk,
24+
isResponsesModel,
25+
responseToChatCompletion,
26+
toResponsesParams,
27+
} from "./openaiResponses.js";
1728
import {
1829
BaseLlmApi,
1930
CreateRerankResponse,
@@ -63,6 +74,11 @@ export class OpenAIApi implements BaseLlmApi {
6374
return body;
6475
}
6576

77+
protected shouldUseResponsesEndpoint(model: string): boolean {
78+
const isOfficialOpenAIAPI = this.apiBase === "https://api.openai.com/v1/";
79+
return isOfficialOpenAIAPI && isResponsesModel(model);
80+
}
81+
6682
modifyCompletionBody<
6783
T extends
6884
| CompletionCreateParamsNonStreaming
@@ -98,6 +114,10 @@ export class OpenAIApi implements BaseLlmApi {
98114
body: ChatCompletionCreateParamsNonStreaming,
99115
signal: AbortSignal,
100116
): Promise<ChatCompletion> {
117+
if (this.shouldUseResponsesEndpoint(body.model)) {
118+
const response = await this.responsesNonStream(body, signal);
119+
return responseToChatCompletion(response);
120+
}
101121
const response = await this.openai.chat.completions.create(
102122
this.modifyChatBody(body),
103123
{
@@ -111,6 +131,12 @@ export class OpenAIApi implements BaseLlmApi {
111131
body: ChatCompletionCreateParamsStreaming,
112132
signal: AbortSignal,
113133
): AsyncGenerator<ChatCompletionChunk, any, unknown> {
134+
if (this.shouldUseResponsesEndpoint(body.model)) {
135+
for await (const chunk of this.responsesStream(body, signal)) {
136+
yield chunk;
137+
}
138+
return;
139+
}
114140
const response = await this.openai.chat.completions.create(
115141
this.modifyChatBody(body),
116142
{
@@ -209,4 +235,42 @@ export class OpenAIApi implements BaseLlmApi {
209235
async list(): Promise<Model[]> {
210236
return (await this.openai.models.list()).data;
211237
}
238+
239+
async responsesNonStream(
240+
body: ChatCompletionCreateParamsNonStreaming,
241+
signal: AbortSignal,
242+
): Promise<Response> {
243+
const params = toResponsesParams({
244+
...(body as ChatCompletionCreateParams),
245+
stream: false,
246+
});
247+
return (await this.openai.responses.create(params, {
248+
signal,
249+
})) as Response;
250+
}
251+
252+
async *responsesStream(
253+
body: ChatCompletionCreateParamsStreaming,
254+
signal: AbortSignal,
255+
): AsyncGenerator<ChatCompletionChunk> {
256+
const params = toResponsesParams({
257+
...(body as ChatCompletionCreateParams),
258+
stream: true,
259+
});
260+
261+
const state = createResponsesStreamState({
262+
model: body.model,
263+
});
264+
265+
const stream = this.openai.responses.stream(params as any, {
266+
signal,
267+
});
268+
269+
for await (const event of stream as AsyncIterable<ResponseStreamEvent>) {
270+
const chunk = fromResponsesChunk(state, event);
271+
if (chunk) {
272+
yield chunk;
273+
}
274+
}
275+
}
212276
}

0 commit comments

Comments
 (0)