diff --git a/core/util/chatDescriber.test.ts b/core/util/chatDescriber.test.ts index 315cb3fea9a..64ba683143e 100644 --- a/core/util/chatDescriber.test.ts +++ b/core/util/chatDescriber.test.ts @@ -30,7 +30,7 @@ describe("ChatDescriber", () => { expect(result).toBeUndefined(); }); - it("should set completionOptions.maxTokens to 12", async () => { + it("should set completionOptions.maxTokens to 16", async () => { const message = "Test message"; const completionOptions: LLMFullCompletionOptions = { temperature: 0.7 }; diff --git a/core/util/chatDescriber.ts b/core/util/chatDescriber.ts index 873d073272c..33a2b31baca 100644 --- a/core/util/chatDescriber.ts +++ b/core/util/chatDescriber.ts @@ -8,7 +8,7 @@ import { renderChatMessage } from "./messageContent"; import { convertFromUnifiedHistory } from "./messageConversion"; export class ChatDescriber { - static maxTokens = 12; + static maxTokens = 16; // Increased from 12 to meet GPT-5 minimum requirement static prompt: string | undefined = "Given the following... please reply with a title for the chat that is 3-4 words in length, all words used should be directly related to the content of the chat, avoid using verbs unless they are directly related to the content of the chat, no additional text or explanation, you don't need ending punctuation.\n\n"; static messenger: IMessenger; diff --git a/extensions/cli/package-lock.json b/extensions/cli/package-lock.json index 5ee422523d2..e645f43949d 100644 --- a/extensions/cli/package-lock.json +++ b/extensions/cli/package-lock.json @@ -184,6 +184,7 @@ "system-ca": "^1.0.3", "tar": "^7.4.3", "tree-sitter-wasms": "^0.1.11", + "untildify": "^6.0.0", "uuid": "^9.0.1", "vectordb": "^0.4.20", "web-tree-sitter": "^0.21.0", @@ -274,7 +275,7 @@ "@aws-sdk/credential-providers": "^3.840.0", "@continuedev/config-types": "^1.0.14", "@continuedev/config-yaml": "^1.14.0", - "@continuedev/fetch": "^1.1.0", + "@continuedev/fetch": "^1.5.0", "dotenv": "^16.5.0", "google-auth-library": "^10.1.0", "json-schema": "^0.4.0", diff --git a/extensions/cli/src/stream/streamChatResponse.test.ts b/extensions/cli/src/stream/streamChatResponse.test.ts index ccff36d0f61..685bf04d546 100644 --- a/extensions/cli/src/stream/streamChatResponse.test.ts +++ b/extensions/cli/src/stream/streamChatResponse.test.ts @@ -252,6 +252,135 @@ describe("processStreamingResponse - content preservation", () => { expect(result.finalContent).toBe("Hello world!"); }); + it("routes gpt-5 models through responsesStream and preserves streaming tool updates", async () => { + const gpt5Chunks: ChatCompletionChunk[] = [ + { + id: "resp_gpt5", + object: "chat.completion.chunk", + created: Date.now(), + model: "gpt-5", + choices: [ + { + index: 0, + delta: { role: "assistant" }, + finish_reason: null, + }, + ], + }, + { + id: "resp_gpt5", + object: "chat.completion.chunk", + created: Date.now(), + model: "gpt-5", + choices: [ + { + index: 0, + delta: { content: "Analyzing repository…" }, + finish_reason: null, + }, + ], + }, + { + id: "resp_gpt5", + object: "chat.completion.chunk", + created: Date.now(), + model: "gpt-5", + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + index: 0, + id: "call_final", + type: "function", + function: { + name: "searchDocs", + arguments: '{"query":"unit', + }, + }, + ], + }, + finish_reason: null, + }, + ], + }, + { + id: "resp_gpt5", + object: "chat.completion.chunk", + created: Date.now(), + model: "gpt-5", + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + index: 0, + type: "function", + function: { + arguments: ' tests"}', + }, + }, + ], + }, + finish_reason: null, + }, + ], + }, + { + id: "resp_gpt5", + object: "chat.completion.chunk", + created: Date.now(), + model: "gpt-5", + choices: [ + { + index: 0, + delta: {}, + finish_reason: "tool_calls", + }, + ], + }, + ]; + + const responsesStream = vi.fn().mockImplementation(async function* () { + for (const chunk of gpt5Chunks) { + yield chunk; + } + }); + const chatCompletionStream = vi.fn().mockImplementation(async function* () { + throw new Error("chatCompletionStream should not be used for gpt-5"); + }); + + mockLlmApi = { + responsesStream, + chatCompletionStream, + } as unknown as BaseLlmApi; + + mockModel = { + model: "gpt-5-preview", + provider: "openai", + } as unknown as ModelConfig; + + const result = await processStreamingResponse({ + chatHistory, + model: mockModel, + llmApi: mockLlmApi, + abortController: mockAbortController, + }); + + expect(responsesStream).toHaveBeenCalledTimes(1); + expect(chatCompletionStream).not.toHaveBeenCalled(); + expect(result.content).toBe("Analyzing repository…"); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls[0]).toMatchObject({ + id: "call_final", + name: "searchDocs", + arguments: { query: "unit tests" }, + }); + expect(result.shouldContinue).toBe(true); + }); + it("handles provider that only sends tool ID in first chunk then uses index", async () => { chunks = [ contentChunk("I'll read the README.md file for you and then say hello!"), diff --git a/extensions/cli/src/util/exponentialBackoff.ts b/extensions/cli/src/util/exponentialBackoff.ts index 926049efa64..c44aee6e818 100644 --- a/extensions/cli/src/util/exponentialBackoff.ts +++ b/extensions/cli/src/util/exponentialBackoff.ts @@ -1,4 +1,4 @@ -import { BaseLlmApi } from "@continuedev/openai-adapters"; +import { BaseLlmApi, isResponsesModel } from "@continuedev/openai-adapters"; import type { ChatCompletionCreateParamsStreaming } from "openai/resources.mjs"; import { error, warn } from "../logging.js"; @@ -173,6 +173,14 @@ export async function chatCompletionStreamWithBackoff( throw new Error("Request aborted"); } + const useResponses = + typeof llmApi.responsesStream === "function" && + isResponsesModel(params.model); + + if (useResponses) { + return llmApi.responsesStream!(params, abortSignal); + } + return llmApi.chatCompletionStream(params, abortSignal); } catch (err: any) { lastError = err; @@ -189,6 +197,14 @@ export async function chatCompletionStreamWithBackoff( // Only retry if the error is retryable if (!isRetryableError(err)) { + // Log full error details for non-retryable errors + logger.error("Non-retryable LLM API error", err, { + status: err.status, + statusText: err.statusText, + message: err.message, + error: err.error, + model: params.model, + }); throw err; } diff --git a/gui/src/pages/AddNewModel/configs/models.ts b/gui/src/pages/AddNewModel/configs/models.ts index b470389e43e..941b0be46e6 100644 --- a/gui/src/pages/AddNewModel/configs/models.ts +++ b/gui/src/pages/AddNewModel/configs/models.ts @@ -1079,6 +1079,19 @@ export const models: { [key: string]: ModelPackage } = { icon: "openai.png", isOpenSource: false, }, + gpt5Codex: { + title: "GPT-5 Codex", + description: + "OpenAI's most advanced code generation model, optimized for programming tasks", + params: { + model: "gpt-5-codex", + contextLength: 400_000, + title: "GPT-5 Codex", + }, + providerOptions: ["openai"], + icon: "openai.png", + isOpenSource: false, + }, gpt4turbo: { title: "GPT-4 Turbo", description: diff --git a/gui/src/pages/AddNewModel/configs/providers.ts b/gui/src/pages/AddNewModel/configs/providers.ts index 88e091ce775..1bab7abeb81 100644 --- a/gui/src/pages/AddNewModel/configs/providers.ts +++ b/gui/src/pages/AddNewModel/configs/providers.ts @@ -118,6 +118,7 @@ export const providers: Partial> = { tags: [ModelProviderTags.RequiresApiKey], packages: [ models.gpt5, + models.gpt5Codex, models.gpt4o, models.gpt4omini, models.gpt4turbo, diff --git a/packages/config-yaml/src/index.ts b/packages/config-yaml/src/index.ts index c87d689eecb..b14b455a70e 100644 --- a/packages/config-yaml/src/index.ts +++ b/packages/config-yaml/src/index.ts @@ -1,2 +1,3 @@ export * from "./browser.js"; export * from "./registryClient.js"; +export { parseAgentFileRules } from "./markdown/agentFiles.js"; diff --git a/packages/llm-info/src/providers/openai.ts b/packages/llm-info/src/providers/openai.ts index 82055930869..730495e9c02 100644 --- a/packages/llm-info/src/providers/openai.ts +++ b/packages/llm-info/src/providers/openai.ts @@ -79,9 +79,17 @@ export const OpenAi: ModelProvider = { { model: "gpt-5", displayName: "GPT-5", + contextLength: 128000, + maxCompletionTokens: 16384, + regex: /^gpt-5$/, + recommendedFor: ["chat"], + }, + { + model: "gpt-5-codex", + displayName: "GPT-5 Codex", contextLength: 400000, maxCompletionTokens: 128000, - regex: /gpt-5/, + regex: /gpt-5-codex/, recommendedFor: ["chat"], }, // gpt-4o diff --git a/packages/openai-adapters/src/apis/OpenAI.ts b/packages/openai-adapters/src/apis/OpenAI.ts index 86452706ad8..4d154ea9708 100644 --- a/packages/openai-adapters/src/apis/OpenAI.ts +++ b/packages/openai-adapters/src/apis/OpenAI.ts @@ -11,9 +11,20 @@ import { CompletionCreateParamsStreaming, Model, } from "openai/resources/index"; +import type { + Response, + ResponseStreamEvent, +} from "openai/resources/responses/responses.js"; import { z } from "zod"; import { OpenAIConfigSchema } from "../types.js"; import { customFetch } from "../util.js"; +import { + createResponsesStreamState, + fromResponsesChunk, + isResponsesModel, + responseToChatCompletion, + toResponsesParams, +} from "./openaiResponses.js"; import { BaseLlmApi, CreateRerankResponse, @@ -63,6 +74,11 @@ export class OpenAIApi implements BaseLlmApi { return body; } + protected shouldUseResponsesEndpoint(model: string): boolean { + const isOfficialOpenAIAPI = this.apiBase === "https://api.openai.com/v1/"; + return isOfficialOpenAIAPI && isResponsesModel(model); + } + modifyCompletionBody< T extends | CompletionCreateParamsNonStreaming @@ -98,6 +114,10 @@ export class OpenAIApi implements BaseLlmApi { body: ChatCompletionCreateParamsNonStreaming, signal: AbortSignal, ): Promise { + if (this.shouldUseResponsesEndpoint(body.model)) { + const response = await this.responsesNonStream(body, signal); + return responseToChatCompletion(response); + } const response = await this.openai.chat.completions.create( this.modifyChatBody(body), { @@ -111,6 +131,12 @@ export class OpenAIApi implements BaseLlmApi { body: ChatCompletionCreateParamsStreaming, signal: AbortSignal, ): AsyncGenerator { + if (this.shouldUseResponsesEndpoint(body.model)) { + for await (const chunk of this.responsesStream(body, signal)) { + yield chunk; + } + return; + } const response = await this.openai.chat.completions.create( this.modifyChatBody(body), { @@ -209,4 +235,42 @@ export class OpenAIApi implements BaseLlmApi { async list(): Promise { return (await this.openai.models.list()).data; } + + async responsesNonStream( + body: ChatCompletionCreateParamsNonStreaming, + signal: AbortSignal, + ): Promise { + const params = toResponsesParams({ + ...(body as ChatCompletionCreateParams), + stream: false, + }); + return (await this.openai.responses.create(params, { + signal, + })) as Response; + } + + async *responsesStream( + body: ChatCompletionCreateParamsStreaming, + signal: AbortSignal, + ): AsyncGenerator { + const params = toResponsesParams({ + ...(body as ChatCompletionCreateParams), + stream: true, + }); + + const state = createResponsesStreamState({ + model: body.model, + }); + + const stream = this.openai.responses.stream(params as any, { + signal, + }); + + for await (const event of stream as AsyncIterable) { + const chunk = fromResponsesChunk(state, event); + if (chunk) { + yield chunk; + } + } + } } diff --git a/packages/openai-adapters/src/apis/base.ts b/packages/openai-adapters/src/apis/base.ts index 5079aedef7d..cf5904d51c7 100644 --- a/packages/openai-adapters/src/apis/base.ts +++ b/packages/openai-adapters/src/apis/base.ts @@ -10,6 +10,7 @@ import { EmbeddingCreateParams, Model, } from "openai/resources/index"; +import type { Response } from "openai/resources/responses/responses.js"; export interface FimCreateParamsStreaming extends CompletionCreateParamsStreaming { @@ -50,6 +51,16 @@ export interface BaseLlmApi { signal: AbortSignal, ): AsyncGenerator; + responsesNonStream?( + body: ChatCompletionCreateParamsNonStreaming, + signal: AbortSignal, + ): Promise; + + responsesStream?( + body: ChatCompletionCreateParamsStreaming, + signal: AbortSignal, + ): AsyncGenerator; + // Completion, no stream completionNonStream( body: CompletionCreateParamsNonStreaming, diff --git a/packages/openai-adapters/src/apis/openaiResponses.ts b/packages/openai-adapters/src/apis/openaiResponses.ts new file mode 100644 index 00000000000..997651be460 --- /dev/null +++ b/packages/openai-adapters/src/apis/openaiResponses.ts @@ -0,0 +1,779 @@ +import type { CompletionUsage } from "openai/resources/index.js"; +import { + ChatCompletion, + ChatCompletionAssistantMessageParam, + ChatCompletionChunk, + ChatCompletionContentPart, + ChatCompletionContentPartImage, + ChatCompletionContentPartInputAudio, + ChatCompletionContentPartRefusal, + ChatCompletionContentPartText, + ChatCompletionCreateParams, + ChatCompletionCreateParamsStreaming, + ChatCompletionMessageParam, + ChatCompletionMessageToolCall, + ChatCompletionTool, +} from "openai/resources/index.js"; +import { + Response, + ResponseCreateParams, + ResponseFunctionCallArgumentsDoneEvent, + ResponseIncompleteEvent, + ResponseInput, + ResponseInputAudio, + ResponseInputContent, + ResponseInputFile, + ResponseInputImage, + ResponseInputText, + ResponseOutputItem, + ResponseOutputMessage, + ResponseOutputRefusal, + ResponseOutputText, + ResponseReasoningSummaryTextDeltaEvent, + ResponseStreamEvent, + ResponseUsage, +} from "openai/resources/responses/responses.js"; + +const RESPONSES_MODEL_REGEX = /^(?:gpt-5|gpt-5-codex|o)/i; + +export function isResponsesModel(model: string): boolean { + return !!model && RESPONSES_MODEL_REGEX.test(model); +} + +function convertTextPart(text: string): ResponseInputText { + return { + text, + type: "input_text", + }; +} + +function convertImagePart( + image: ChatCompletionContentPartImage, +): ResponseInputImage { + const converted: ResponseInputImage = { + type: "input_image", + image_url: image.image_url.url, + detail: image.image_url.detail ?? "auto", + }; + if ((image.image_url as any).file_id) { + (converted as any).file_id = (image.image_url as any).file_id; + } + return converted; +} + +function convertAudioPart( + part: ChatCompletionContentPartInputAudio, +): ResponseInputAudio { + return { + type: "input_audio", + input_audio: { + data: part.input_audio.data, + format: part.input_audio.format, + }, + }; +} + +function convertFilePart( + part: ChatCompletionContentPart.File, +): ResponseInputFile { + return { + type: "input_file", + file_id: part.file.file_id ?? undefined, + file_data: part.file.file_data ?? undefined, + filename: part.file.filename ?? undefined, + file_url: (part.file as any).file_url ?? undefined, + }; +} + +function convertMessageContentPart( + part: ChatCompletionContentPart | ChatCompletionContentPartRefusal, +): ResponseInputContent | undefined { + switch (part.type) { + case "text": + return convertTextPart(part.text); + case "image_url": + return convertImagePart(part); + case "input_audio": + return convertAudioPart(part); + case "file": + return convertFilePart(part); + case "refusal": + // Skip refusal parts - they're not input content + return undefined; + default: + return undefined; + } +} + +function collectMessageContentParts( + content: ChatCompletionMessageParam["content"], +): ResponseInputContent[] { + if (typeof content === "string") { + return [convertTextPart(content)]; + } + if (!Array.isArray(content)) { + return []; + } + + const parts: ResponseInputContent[] = []; + for (const part of content) { + const converted = convertMessageContentPart(part); + if (!converted) { + continue; + } + parts.push(converted); + } + return parts; +} + +type AssistantContentPart = ResponseOutputText | ResponseOutputRefusal; + +function createOutputTextPart( + text: string, + source?: Partial, +): AssistantContentPart { + const annotations = + Array.isArray(source?.annotations) && source.annotations.length > 0 + ? source.annotations + : []; + const part: ResponseOutputText = { + text, + type: "output_text", + annotations, + }; + if (Array.isArray(source?.logprobs) && source.logprobs.length > 0) { + part.logprobs = source.logprobs; + } + return part; +} + +function createRefusalPart(refusal: string): AssistantContentPart { + return { + refusal, + type: "refusal", + }; +} + +function collectAssistantContentParts( + content: ChatCompletionMessageParam["content"], + refusal?: string | null, +): AssistantContentPart[] { + const parts: AssistantContentPart[] = []; + + if (typeof content === "string") { + if (content.trim().length > 0) { + parts.push(createOutputTextPart(content)); + } + } else if (Array.isArray(content)) { + for (const rawPart of content) { + // Content array should be ChatCompletionContentPartText | ChatCompletionContentPartRefusal + // but we handle "output_text" type which may come from Response API conversions + const part = rawPart as + | ChatCompletionContentPartText + | ChatCompletionContentPartRefusal + | { type: "output_text"; text: string }; + if (!part) { + continue; + } + + const partType = part.type; + if (partType === "text") { + const textPart = part as ChatCompletionContentPartText; + if ( + typeof textPart.text === "string" && + textPart.text.trim().length > 0 + ) { + parts.push(createOutputTextPart(textPart.text)); + } + } else if (partType === "output_text") { + const textValue = (part as { type: "output_text"; text: string }).text; + if (typeof textValue === "string" && textValue.trim().length > 0) { + parts.push(createOutputTextPart(textValue)); + } + } else if (partType === "refusal") { + const refusalPart = part as ChatCompletionContentPartRefusal; + const refusalText = refusalPart.refusal; + if (typeof refusalText === "string" && refusalText.trim().length > 0) { + parts.push(createRefusalPart(refusalText)); + } + } + } + } + + if (typeof refusal === "string" && refusal.trim().length > 0) { + parts.push(createRefusalPart(refusal)); + } + + return parts; +} + +function extractToolResultContent( + content: ChatCompletionMessageParam["content"], +): string { + if (typeof content === "string") { + return content; + } + if (!Array.isArray(content)) { + return ""; + } + + return content + .map((part) => { + if (part.type === "text") { + return part.text; + } + return ""; + }) + .join(""); +} + +function convertTools( + tools?: ChatCompletionTool[] | null, + legacyFunctions?: ChatCompletionCreateParams["functions"], +): ResponseCreateParams["tools"] | undefined { + if (tools?.length) { + return tools.map((tool) => { + if (tool.type === "function") { + return { + type: "function" as const, + name: tool.function.name, + description: tool.function.description ?? null, + parameters: tool.function.parameters ?? null, + strict: + tool.function.strict !== undefined ? tool.function.strict : null, + }; + } + return tool as any; + }); + } + + if (legacyFunctions?.length) { + return legacyFunctions.map((fn) => ({ + type: "function" as const, + name: fn.name, + description: fn.description ?? null, + parameters: fn.parameters ?? null, + strict: null, + })); + } + + return undefined; +} + +function resolveToolChoice( + params: ChatCompletionCreateParams, +): ResponseCreateParams["tool_choice"] | undefined { + if (params.tool_choice) { + return params.tool_choice as any; + } + if (params.function_call) { + if (typeof params.function_call === "string") { + if (params.function_call === "none") { + return "none"; + } + if (params.function_call === "auto") { + return "auto"; + } + } else if (params.function_call?.name) { + return { + type: "function", + name: params.function_call.name, + }; + } + } + return undefined; +} + +export function toResponsesInput( + messages: ChatCompletionMessageParam[], +): ResponseInput { + const inputItems: ResponseInput = []; + let assistantMessageCounter = 0; + + for (const message of messages) { + if (message.role === "tool") { + if (!message.tool_call_id) { + continue; + } + const rawContent = extractToolResultContent(message.content); + inputItems.push({ + type: "function_call_output", + call_id: message.tool_call_id, + output: rawContent, + }); + continue; + } + + if (message.role === "system" || message.role === "developer") { + const contentParts = collectMessageContentParts(message.content); + if (contentParts.length === 0) { + continue; + } + inputItems.push({ + type: "message", + role: "developer", + content: contentParts, + }); + continue; + } + + if (message.role === "user") { + const contentParts = collectMessageContentParts(message.content); + if (contentParts.length === 0) { + continue; + } + inputItems.push({ + type: "message", + role: "user", + content: contentParts, + }); + continue; + } + + if (message.role === "assistant") { + const assistantMessage = message as ChatCompletionAssistantMessageParam; + const assistantContentParts = collectAssistantContentParts( + assistantMessage.content, + assistantMessage.refusal ?? null, + ); + if (assistantContentParts.length > 0) { + const providedId = (message as any).id; + const assistantId = + typeof providedId === "string" && providedId.startsWith("msg_") + ? providedId + : `msg_${(assistantMessageCounter++).toString().padStart(4, "0")}`; + inputItems.push({ + type: "message", + role: "assistant", + content: assistantContentParts, + id: assistantId, + status: "completed", + } as ResponseOutputMessage as any); + } + if (assistantMessage.tool_calls?.length) { + assistantMessage.tool_calls.forEach((toolCall, index) => { + if (toolCall.type === "function") { + const callId = toolCall.id ?? `tool_call_${index}`; + const functionCall: any = { + type: "function_call", + call_id: callId, + name: toolCall.function.name ?? "", + arguments: toolCall.function.arguments ?? "{}", + }; + if ( + typeof toolCall.id === "string" && + toolCall.id.startsWith("fc_") + ) { + functionCall.id = toolCall.id; + } + inputItems.push(functionCall); + } + }); + } + continue; + } + } + + return inputItems; +} + +export function toResponsesParams( + params: ChatCompletionCreateParams, +): ResponseCreateParams { + const input = toResponsesInput(params.messages); + + const responsesParams: ResponseCreateParams = { + model: params.model, + input, + stream: + (params as ChatCompletionCreateParamsStreaming).stream === true + ? true + : false, + tool_choice: resolveToolChoice(params), + tools: convertTools(params.tools, params.functions), + }; + + if (params.temperature !== undefined && params.temperature !== null) { + responsesParams.temperature = params.temperature; + } + if (params.top_p !== undefined && params.top_p !== null) { + responsesParams.top_p = params.top_p; + } + if (params.metadata !== undefined) { + responsesParams.metadata = params.metadata ?? null; + } + if (params.prompt_cache_key !== undefined) { + responsesParams.prompt_cache_key = params.prompt_cache_key; + } + const maxOutputTokens = + params.max_completion_tokens ?? params.max_tokens ?? null; + if (maxOutputTokens !== null) { + responsesParams.max_output_tokens = maxOutputTokens; + } + if (params.parallel_tool_calls !== undefined) { + responsesParams.parallel_tool_calls = params.parallel_tool_calls; + } else if (params.tools?.length) { + responsesParams.parallel_tool_calls = false; + } + if (params.reasoning_effort) { + responsesParams.reasoning = { + effort: params.reasoning_effort, + }; + } + + // Remove undefined properties to avoid overriding server defaults + Object.keys(responsesParams).forEach((key) => { + const typedKey = key as keyof ResponseCreateParams; + if (responsesParams[typedKey] === undefined) { + delete responsesParams[typedKey]; + } + }); + + return responsesParams; +} + +function mapUsage(usage?: ResponseUsage | null): CompletionUsage | undefined { + if (!usage) { + return undefined; + } + + const mapped: CompletionUsage = { + completion_tokens: usage.output_tokens, + prompt_tokens: usage.input_tokens, + total_tokens: usage.total_tokens, + }; + + return mapped; +} + +interface ToolCallState { + id: string; + callId: string; + index: number; + name?: string; + arguments: string; +} + +interface MessageState { + content: string; + refusal: string | null; +} + +export interface ResponsesStreamState { + context: { + id?: string; + model: string; + created?: number; + pendingFinish?: ChatCompletionChunk.Choice["finish_reason"]; + }; + messages: Map; + toolCalls: Map; + indexToToolCallId: Map; +} + +export function createResponsesStreamState(context: { + model: string; + responseId?: string; + created?: number; +}): ResponsesStreamState { + return { + context: { + id: context.responseId, + model: context.model, + created: context.created, + pendingFinish: null, + }, + messages: new Map(), + toolCalls: new Map(), + indexToToolCallId: new Map(), + }; +} + +function buildChunk( + state: ResponsesStreamState, + delta: Partial = {}, + finishReason: ChatCompletionChunk.Choice["finish_reason"] = null, + usage?: CompletionUsage, + options?: { includeChoices?: boolean }, +): ChatCompletionChunk { + const includeChoices = options?.includeChoices ?? true; + const created = state.context.created ?? Math.floor(Date.now() / 1000); + const id = state.context.id ?? ""; + + const chunk: ChatCompletionChunk = { + id, + object: "chat.completion.chunk", + created, + model: state.context.model, + choices: includeChoices + ? [ + { + index: 0, + delta: delta as ChatCompletionChunk.Choice["delta"], + finish_reason: finishReason, + logprobs: null, + }, + ] + : [], + }; + + if (usage) { + chunk.usage = usage; + } + + return chunk; +} + +function mapIncompleteReason( + event: ResponseIncompleteEvent, +): ChatCompletionChunk.Choice["finish_reason"] { + const reason = event.response.incomplete_details?.reason; + if (reason === "max_output_tokens") { + return "length"; + } + if (reason === "content_filter") { + return "content_filter"; + } + return "stop"; +} + +function upsertToolCallState( + state: ResponsesStreamState, + item: ResponseOutputItem, + outputIndex: number, +): ToolCallState { + const callId = + (item as any).call_id ?? item.id ?? `tool_call_${state.toolCalls.size}`; + const toolState: ToolCallState = { + id: item.id ?? callId, + callId, + index: outputIndex, + name: (item as any).name ?? undefined, + arguments: (item as any).arguments ?? "", + }; + state.toolCalls.set(item.id ?? callId, toolState); + state.indexToToolCallId.set(outputIndex, callId); + return toolState; +} + +function getToolCallState( + state: ResponsesStreamState, + itemId: string, + outputIndex: number, +): ToolCallState | undefined { + const existing = state.toolCalls.get(itemId); + if (existing) { + return existing; + } + const byIndex = state.indexToToolCallId.get(outputIndex); + if (!byIndex) { + return undefined; + } + return state.toolCalls.get(byIndex); +} + +export function fromResponsesChunk( + state: ResponsesStreamState, + event: ResponseStreamEvent, +): ChatCompletionChunk | undefined { + switch (event.type) { + case "response.created": { + state.context.id = event.response.id; + state.context.created = event.response.created_at; + if (event.response.model) { + state.context.model = event.response.model; + } + return undefined; + } + case "response.output_item.added": { + const item = event.item; + if (item.type === "message") { + state.messages.set(item.id, { content: "", refusal: null }); + } else if (item.type === "function_call") { + upsertToolCallState(state, item, event.output_index); + } + return undefined; + } + case "response.output_text.delta": { + const messageState = state.messages.get(event.item_id); + if (messageState) { + messageState.content += event.delta; + } + return buildChunk(state, { content: event.delta }); + } + case "response.reasoning_text.delta": { + return buildChunk(state, { + reasoning: { + content: [ + { + type: "reasoning_text", + text: event.delta, + }, + ], + }, + } as any); + } + case "response.reasoning_summary_text.delta": { + const summaryEvent = event as ResponseReasoningSummaryTextDeltaEvent; + return buildChunk(state, { + reasoning: { + content: [ + { + type: "reasoning_text", + text: summaryEvent.delta, + }, + ], + }, + } as any); + } + case "response.refusal.delta": { + const messageState = state.messages.get(event.item_id); + if (messageState) { + messageState.refusal = (messageState.refusal ?? "") + event.delta; + } + return buildChunk(state, { refusal: event.delta }); + } + case "response.function_call_arguments.delta": { + const callState = getToolCallState( + state, + event.item_id, + event.output_index, + ); + if (!callState) { + return undefined; + } + callState.arguments += event.delta; + return buildChunk(state, { + tool_calls: [ + { + index: callState.index, + id: callState.callId, + type: "function", + function: { + name: callState.name, + arguments: event.delta, + }, + }, + ], + }); + } + case "response.function_call_arguments.done": { + const doneEvent = event as ResponseFunctionCallArgumentsDoneEvent; + const callState = getToolCallState( + state, + doneEvent.item_id, + doneEvent.output_index, + ); + if (callState) { + callState.arguments = doneEvent.arguments; + } + return undefined; + } + case "response.output_item.done": { + if (event.item.type === "function_call") { + return buildChunk(state, {}, "tool_calls"); + } + if (event.item.type === "message") { + return buildChunk(state, {}, state.context.pendingFinish ?? "stop"); + } + return undefined; + } + case "response.completed": { + state.context.id = event.response.id; + state.context.created = event.response.created_at; + state.context.model = event.response.model ?? state.context.model; + const usage = mapUsage(event.response.usage); + if (usage) { + return buildChunk(state, {}, null, usage, { + includeChoices: false, + }); + } + return undefined; + } + case "response.incomplete": { + const reason = mapIncompleteReason(event as ResponseIncompleteEvent); + state.context.pendingFinish = reason; + const usage = mapUsage((event as ResponseIncompleteEvent).response.usage); + if (usage) { + return buildChunk(state, {}, null, usage, { + includeChoices: false, + }); + } + return buildChunk(state, {}, reason); + } + case "response.failed": + case "error": { + state.context.pendingFinish = "content_filter"; + return undefined; + } + default: + return undefined; + } +} + +export function responseToChatCompletion(response: Response): ChatCompletion { + const usage = mapUsage(response.usage); + let finishReason: ChatCompletionChunk.Choice["finish_reason"] = "stop"; + if (response.incomplete_details?.reason === "max_output_tokens") { + finishReason = "length"; + } else if (response.incomplete_details?.reason === "content_filter") { + finishReason = "content_filter"; + } + + const messageContent: string[] = []; + let refusal: string | null = null; + const toolCalls: ChatCompletion["choices"][0]["message"]["tool_calls"] = []; + + response.output.forEach((item) => { + if (item.type === "message") { + item.content.forEach((contentPart) => { + if (contentPart.type === "output_text") { + messageContent.push(contentPart.text); + } else if (contentPart.type === "refusal") { + refusal = (refusal ?? "") + contentPart.refusal; + } + }); + } else if (item.type === "function_call") { + toolCalls.push({ + id: item.call_id ?? item.id, + type: "function", + function: { + name: item.name, + arguments: item.arguments, + }, + }); + } + }); + + if (toolCalls.length > 0) { + finishReason = "tool_calls"; + } + + const message = { + role: "assistant" as const, + content: messageContent.length ? messageContent.join("") : null, + refusal, + tool_calls: toolCalls.length ? toolCalls : undefined, + }; + + const chatCompletion: ChatCompletion = { + id: response.id, + object: "chat.completion", + created: response.created_at, + model: response.model, + choices: [ + { + index: 0, + message, + finish_reason: finishReason, + logprobs: null, + }, + ], + }; + + if (usage) { + chatCompletion.usage = usage; + } + + return chatCompletion; +} diff --git a/packages/openai-adapters/src/index.ts b/packages/openai-adapters/src/index.ts index 29121dab8eb..a7ee579f9f6 100644 --- a/packages/openai-adapters/src/index.ts +++ b/packages/openai-adapters/src/index.ts @@ -188,3 +188,5 @@ export { getAnthropicHeaders, getAnthropicMediaTypeFromDataUrl, } from "./apis/AnthropicUtils.js"; + +export { isResponsesModel } from "./apis/openaiResponses.js"; diff --git a/packages/openai-adapters/src/test/openai-responses.vitest.ts b/packages/openai-adapters/src/test/openai-responses.vitest.ts new file mode 100644 index 00000000000..68173da898b --- /dev/null +++ b/packages/openai-adapters/src/test/openai-responses.vitest.ts @@ -0,0 +1,487 @@ +import { describe, expect, it } from "vitest"; + +import type { ChatCompletionChunk } from "openai/resources/index.js"; +import type { + ChatCompletionAssistantMessageParam, + ChatCompletionMessageParam, +} from "openai/resources/index.js"; +import type { + Response, + ResponseCompletedEvent, + ResponseFunctionCallArgumentsDeltaEvent, + ResponseFunctionCallArgumentsDoneEvent, + ResponseOutputItemAddedEvent, + ResponseOutputItemDoneEvent, + ResponseReasoningTextDeltaEvent, + ResponseStreamEvent, + ResponseTextDeltaEvent, +} from "openai/resources/responses/responses.js"; + +import { + createResponsesStreamState, + fromResponsesChunk, + responseToChatCompletion, + toResponsesInput, +} from "../apis/openaiResponses.js"; + +describe("toResponsesInput", () => { + it("maps assistant text content to output_text with generated msg ids", () => { + const messages: ChatCompletionMessageParam[] = [ + { + role: "assistant", + content: "Hello there!", + }, + ]; + + const inputItems = toResponsesInput(messages); + + expect(inputItems).toHaveLength(1); + const assistant = inputItems[0] as any; + expect(assistant).toMatchObject({ + type: "message", + role: "assistant", + id: "msg_0000", + }); + expect(assistant.content).toMatchObject([ + { + type: "output_text", + text: "Hello there!", + }, + ]); + }); + + it("maps assistant refusal content to refusal output items", () => { + const messages: ChatCompletionMessageParam[] = [ + { + role: "assistant", + content: "", + refusal: "I must decline.", + } as ChatCompletionAssistantMessageParam, + ]; + + const inputItems = toResponsesInput(messages); + + expect(inputItems).toHaveLength(1); + const assistant = inputItems[0] as any; + expect(assistant.content).toEqual([ + { + type: "refusal", + refusal: "I must decline.", + }, + ]); + }); + + it("converts assistant structured content into output_text items", () => { + const messages: ChatCompletionMessageParam[] = [ + { + role: "assistant", + content: [{ type: "text", text: "Structured hello." }], + } as ChatCompletionAssistantMessageParam, + ]; + + const inputItems = toResponsesInput(messages); + + const assistant = inputItems[0] as any; + expect(assistant.content).toMatchObject([ + { + type: "output_text", + text: "Structured hello.", + }, + ]); + }); + + it("converts chat messages, multimodal content, and tool interactions into Responses input items", () => { + const messages: ChatCompletionMessageParam[] = [ + { role: "system", content: "Stay concise." }, + { + role: "user", + content: [ + { type: "text", text: "Look at this image" }, + { + type: "image_url", + image_url: { url: "https://example.com/cat.png", detail: "auto" }, + }, + ], + }, + { + role: "assistant", + tool_calls: [ + { + id: "fc_call_1", + type: "function", + function: { + name: "searchDocs", + arguments: '{"query":"vitest expectations"}', + }, + }, + ], + content: "", + } as ChatCompletionAssistantMessageParam, + { + role: "tool", + tool_call_id: "call_1", + content: "Found 3 relevant documents.", + }, + ]; + + const inputItems = toResponsesInput(messages); + + expect(inputItems).toMatchObject([ + { + type: "message", + role: "developer", + content: [{ type: "input_text", text: "Stay concise." }], + }, + { + type: "message", + role: "user", + content: [ + { type: "input_text", text: "Look at this image" }, + { + type: "input_image", + image_url: "https://example.com/cat.png", + detail: "auto", + }, + ], + }, + { + type: "function_call", + call_id: "fc_call_1", + id: "fc_call_1", + name: "searchDocs", + arguments: '{"query":"vitest expectations"}', + }, + { + type: "function_call_output", + call_id: "call_1", + output: "Found 3 relevant documents.", + }, + ]); + }); +}); + +it("omits function_call id when tool call id lacks fc_ prefix", () => { + const messages: ChatCompletionMessageParam[] = [ + { + role: "assistant", + tool_calls: [ + { + id: "call_custom", + type: "function", + function: { + name: "lookup", + arguments: "{}", + }, + }, + ], + content: "", + } as ChatCompletionAssistantMessageParam, + ]; + + const inputItems = toResponsesInput(messages); + const functionCall = inputItems.find( + (item: any) => item.type === "function_call", + ) as any; + + expect(functionCall).toBeTruthy(); + expect(functionCall.call_id).toBe("call_custom"); + expect(functionCall).not.toHaveProperty("id"); +}); + +describe("fromResponsesChunk", () => { + function collectChunks(events: ResponseStreamEvent[]): ChatCompletionChunk[] { + const state = createResponsesStreamState({ + created: 1710000000, + model: "gpt-5-preview", + responseId: "resp_123", + }); + + const chunks: ChatCompletionChunk[] = []; + for (const event of events) { + const result = fromResponsesChunk(state, event); + if (result) { + chunks.push(result); + } + } + return chunks; + } + + it("emits incremental assistant content and finish_reason from Responses text deltas", () => { + const messageAdded: ResponseOutputItemAddedEvent = { + type: "response.output_item.added", + output_index: 0, + sequence_number: 1, + item: { + id: "msg_1", + type: "message", + role: "assistant", + content: [], + } as any, + }; + const firstDelta: ResponseTextDeltaEvent = { + type: "response.output_text.delta", + sequence_number: 2, + item_id: "msg_1", + output_index: 0, + content_index: 0, + delta: "Hello", + logprobs: [], + }; + const secondDelta: ResponseTextDeltaEvent = { + type: "response.output_text.delta", + sequence_number: 3, + item_id: "msg_1", + output_index: 0, + content_index: 0, + delta: " world", + logprobs: [], + }; + const messageDone: ResponseOutputItemDoneEvent = { + type: "response.output_item.done", + sequence_number: 4, + output_index: 0, + item: { + id: "msg_1", + type: "message", + role: "assistant", + content: [ + { + type: "output_text", + text: "Hello world", + }, + ], + } as any, + }; + const completed: ResponseCompletedEvent = { + type: "response.completed", + sequence_number: 5, + response: { + id: "resp_123", + object: "response", + model: "gpt-5-preview", + created_at: 1710000000, + output_text: "Hello world", + error: null, + incomplete_details: null, + instructions: null, + metadata: null, + output: [], + parallel_tool_calls: false, + temperature: null, + tool_choice: null as any, + tools: [], + usage: { + input_tokens: 12, + input_tokens_details: { cached_tokens: 0 }, + output_tokens: 9, + output_tokens_details: { reasoning_tokens: 0 }, + total_tokens: 21, + }, + } as unknown as Response, + }; + + const chunks = collectChunks([ + messageAdded, + firstDelta, + secondDelta, + messageDone, + completed, + ]); + + expect(chunks[0].choices[0].delta.content).toBe("Hello"); + expect(chunks[1].choices[0].delta.content).toBe(" world"); + const finishChunk = chunks.find( + (chunk) => chunk.choices[0].finish_reason !== null, + ); + expect(finishChunk?.choices[0].finish_reason).toBe("stop"); + const usageChunk = chunks[chunks.length - 1]; + expect(usageChunk.usage).toMatchObject({ + prompt_tokens: 12, + completion_tokens: 9, + total_tokens: 21, + }); + }); + + it("tracks streaming tool call arguments and surfaces tool_calls deltas", () => { + const toolAdded: ResponseOutputItemAddedEvent = { + type: "response.output_item.added", + sequence_number: 1, + output_index: 0, + item: { + id: "tool_item_1", + type: "function_call", + call_id: "call_99", + name: "searchDocs", + arguments: "", + status: "in_progress", + } as any, + }; + const toolDeltaA: ResponseFunctionCallArgumentsDeltaEvent = { + type: "response.function_call_arguments.delta", + sequence_number: 2, + item_id: "tool_item_1", + output_index: 0, + delta: '{"query":"vit', + }; + const toolDeltaB: ResponseFunctionCallArgumentsDeltaEvent = { + type: "response.function_call_arguments.delta", + sequence_number: 3, + item_id: "tool_item_1", + output_index: 0, + delta: 'est"}', + }; + const toolDone: ResponseFunctionCallArgumentsDoneEvent = { + type: "response.function_call_arguments.done", + sequence_number: 4, + item_id: "tool_item_1", + output_index: 0, + arguments: '{"query":"vitest"}', + }; + const toolOutputDone: ResponseOutputItemDoneEvent = { + type: "response.output_item.done", + sequence_number: 5, + output_index: 0, + item: { + id: "tool_item_1", + type: "function_call", + call_id: "call_99", + name: "searchDocs", + arguments: '{"query":"vitest"}', + status: "completed", + } as any, + }; + + const chunks = collectChunks([ + toolAdded, + toolDeltaA, + toolDeltaB, + toolDone, + toolOutputDone, + ]); + + expect(chunks[0].choices[0].delta.tool_calls?.[0].function?.arguments).toBe( + '{"query":"vit', + ); + expect(chunks[1].choices[0].delta.tool_calls?.[0].function?.arguments).toBe( + 'est"}', + ); + const toolFinish = chunks[chunks.length - 1]; + expect(toolFinish.choices[0].finish_reason).toBe("tool_calls"); + }); + + it("emits reasoning deltas when reasoning items stream", () => { + const reasoningAdded: ResponseOutputItemAddedEvent = { + type: "response.output_item.added", + sequence_number: 1, + output_index: 0, + item: { + id: "reason_1", + type: "reasoning", + summary: [], + content: [], + } as any, + }; + const reasoningDelta: ResponseReasoningTextDeltaEvent = { + type: "response.reasoning_text.delta", + sequence_number: 2, + item_id: "reason_1", + output_index: 0, + content_index: 0, + delta: "First, inspect the repository structure.", + }; + + const chunks = collectChunks([reasoningAdded, reasoningDelta]); + expect(chunks).toHaveLength(1); + expect(chunks[0].choices[0].delta).toMatchObject({ + reasoning: { + content: [ + { + type: "reasoning_text", + text: "First, inspect the repository structure.", + }, + ], + }, + }); + }); +}); + +describe("responseToChatCompletion", () => { + it("converts a completed Responses payload into a ChatCompletion summary", () => { + const response = { + id: "resp_final", + object: "response", + model: "gpt-5-mini", + created_at: 1710000001, + output_text: "Tool call required.", + error: null, + incomplete_details: null, + instructions: null, + metadata: null, + parallel_tool_calls: false, + temperature: null, + tool_choice: null, + tools: [], + usage: { + input_tokens: 100, + input_tokens_details: { cached_tokens: 4 }, + output_tokens: 42, + output_tokens_details: { reasoning_tokens: 10 }, + total_tokens: 142, + }, + output: [ + { + id: "reason_final", + type: "reasoning", + summary: [], + content: [ + { + type: "reasoning_text", + text: "Identify missing unit tests first.", + }, + ], + }, + { + id: "tool_item_final", + type: "function_call", + call_id: "call_final", + name: "searchDocs", + arguments: '{"query":"unit tests"}', + status: "completed", + }, + { + id: "msg_final", + type: "message", + role: "assistant", + content: [ + { + type: "output_text", + text: "Triggering searchDocs tool with the provided query.", + }, + ], + }, + ], + } as unknown as Response; + + const result = responseToChatCompletion(response); + + expect(result.choices[0].message.content).toBe( + "Triggering searchDocs tool with the provided query.", + ); + expect(result.choices[0].message.tool_calls).toEqual([ + { + id: "call_final", + type: "function", + function: { + name: "searchDocs", + arguments: '{"query":"unit tests"}', + }, + }, + ]); + expect(result.choices[0].finish_reason).toBe("tool_calls"); + expect(result.usage).toEqual({ + prompt_tokens: 100, + completion_tokens: 42, + total_tokens: 142, + }); + }); +});