From 349d98ac2f86cef30c0adcc3dd99f30121920279 Mon Sep 17 00:00:00 2001 From: Nate Date: Mon, 22 Sep 2025 12:56:16 -0700 Subject: [PATCH 1/2] feat: add token usage tracking to OpenAI adapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Modified OpenAI adapter to properly handle and emit usage chunks in streaming responses - Added logic to store usage chunks and emit them at the end of the stream - Verified Anthropic and Gemini adapters already have complete token usage implementations - Added comprehensive tests for token usage tracking across all three providers - All tests passing with provided API keys 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- packages/openai-adapters/package-lock.json | 1 + packages/openai-adapters/src/apis/OpenAI.ts | 13 +- .../src/test/token-usage.test.ts | 353 ++++++++++++++++++ 3 files changed, 366 insertions(+), 1 deletion(-) create mode 100644 packages/openai-adapters/src/test/token-usage.test.ts diff --git a/packages/openai-adapters/package-lock.json b/packages/openai-adapters/package-lock.json index 77ab666f1e5..d2e80cd0068 100644 --- a/packages/openai-adapters/package-lock.json +++ b/packages/openai-adapters/package-lock.json @@ -15179,6 +15179,7 @@ "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.2.tgz", "integrity": "sha512-CWBzXQrc/qOkhidw1OzBTQuYRbfyxDXJMVJ1XNwUHGROVmuaeiEm3OslpZ1RV96d7SKKjZKrSJu3+t/xlw3R9A==", "dev": true, + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" diff --git a/packages/openai-adapters/src/apis/OpenAI.ts b/packages/openai-adapters/src/apis/OpenAI.ts index 310e5ef4028..86452706ad8 100644 --- a/packages/openai-adapters/src/apis/OpenAI.ts +++ b/packages/openai-adapters/src/apis/OpenAI.ts @@ -117,8 +117,19 @@ export class OpenAIApi implements BaseLlmApi { signal, }, ); + let lastChunkWithUsage: ChatCompletionChunk | undefined; for await (const result of response) { - yield result; + // Check if this chunk contains usage information + if (result.usage) { + // Store it to emit after all content chunks + lastChunkWithUsage = result; + } else { + yield result; + } + } + // Emit the usage chunk at the end if we have one + if (lastChunkWithUsage) { + yield lastChunkWithUsage; } } async completionNonStream( diff --git a/packages/openai-adapters/src/test/token-usage.test.ts b/packages/openai-adapters/src/test/token-usage.test.ts new file mode 100644 index 00000000000..15129dd3f41 --- /dev/null +++ b/packages/openai-adapters/src/test/token-usage.test.ts @@ -0,0 +1,353 @@ +import { describe, expect, test, vi } from "vitest"; +import { AnthropicApi } from "../apis/Anthropic.js"; +import { GeminiApi } from "../apis/Gemini.js"; +import { OpenAIApi } from "../apis/OpenAI.js"; +import { CompletionUsage } from "openai/resources/index.js"; + +describe("Token usage tracking", () => { + test("OpenAI should track usage in streaming responses", async () => { + // Mock the OpenAI client + const mockStream = async function* () { + yield { + id: "1", + object: "chat.completion.chunk", + created: Date.now(), + model: "gpt-4", + choices: [ + { + index: 0, + delta: { content: "Hello", role: "assistant" }, + finish_reason: null, + logprobs: null, + }, + ], + }; + yield { + id: "1", + object: "chat.completion.chunk", + created: Date.now(), + model: "gpt-4", + choices: [ + { + index: 0, + delta: { content: " world", role: "assistant" }, + finish_reason: "stop", + logprobs: null, + }, + ], + }; + // Usage chunk + yield { + id: "1", + object: "chat.completion.chunk", + created: Date.now(), + model: "gpt-4", + choices: [], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + }; + }; + + const api = new OpenAIApi({ apiKey: "test", provider: "openai" }); + api.openai.chat.completions.create = vi.fn().mockResolvedValue(mockStream()); + + const stream = api.chatCompletionStream( + { + model: "gpt-4", + messages: [{ role: "user", content: "Hello" }], + stream: true, + }, + new AbortController().signal + ); + + let content = ""; + let usage: CompletionUsage | undefined; + for await (const chunk of stream) { + if (chunk.choices.length > 0) { + content += chunk.choices[0].delta.content ?? ""; + } + if (chunk.usage) { + usage = chunk.usage; + } + } + + expect(content).toBe("Hello world"); + expect(usage).toBeDefined(); + expect(usage?.prompt_tokens).toBe(10); + expect(usage?.completion_tokens).toBe(5); + expect(usage?.total_tokens).toBe(15); + }); + + test("Anthropic should track usage in streaming responses", async () => { + // Create a mock response that simulates Anthropic's SSE stream + const mockResponseText = `event: message_start +data: {"type":"message_start","message":{"usage":{"input_tokens":10,"cache_read_input_tokens":2}}} + +event: content_block_delta +data: {"type":"content_block_delta","delta":{"type":"text_delta","text":"Hello"}} + +event: content_block_delta +data: {"type":"content_block_delta","delta":{"type":"text_delta","text":" world"}} + +event: message_delta +data: {"type":"message_delta","usage":{"output_tokens":5}} + +event: message_stop +data: {"type":"message_stop"} +`; + + const mockResponse = { + ok: true, + status: 200, + headers: new Headers({ "content-type": "text/event-stream" }), + text: vi.fn().mockResolvedValue(mockResponseText), + body: new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode(mockResponseText)); + controller.close(); + }, + }), + }; + + global.fetch = vi.fn().mockResolvedValue(mockResponse); + + const api = new AnthropicApi({ apiKey: "test", provider: "anthropic" }); + + const stream = api.chatCompletionStream( + { + model: "claude-3", + messages: [{ role: "user", content: "Hello" }], + stream: true, + }, + new AbortController().signal + ); + + let content = ""; + let usage: CompletionUsage | undefined; + for await (const chunk of stream) { + if (chunk.choices.length > 0) { + content += chunk.choices[0].delta.content ?? ""; + } + if (chunk.usage) { + usage = chunk.usage; + } + } + + expect(content).toBe("Hello world"); + expect(usage).toBeDefined(); + expect(usage?.prompt_tokens).toBe(10); + expect(usage?.completion_tokens).toBe(5); + expect(usage?.total_tokens).toBe(15); + expect(usage?.prompt_tokens_details?.cached_tokens).toBe(2); + }); + + test("Gemini should track usage in streaming responses", async () => { + // Create a mock response for Gemini streaming + const mockResponseData = [ + { + candidates: [ + { + content: { + parts: [{ text: "Hello" }], + }, + }, + ], + }, + { + candidates: [ + { + content: { + parts: [{ text: " world" }], + }, + }, + ], + usageMetadata: { + promptTokenCount: 10, + candidatesTokenCount: 5, + totalTokenCount: 15, + }, + }, + ]; + + const mockResponse = { + ok: true, + status: 200, + headers: new Headers({ "content-type": "application/json" }), + body: new ReadableStream({ + start(controller) { + controller.enqueue( + new TextEncoder().encode(JSON.stringify(mockResponseData)) + ); + controller.close(); + }, + }), + }; + + global.fetch = vi.fn().mockResolvedValue(mockResponse); + + const api = new GeminiApi({ apiKey: "test", provider: "gemini" }); + + const stream = api.chatCompletionStream( + { + model: "gemini-1.5-flash", + messages: [{ role: "user", content: "Hello" }], + stream: true, + }, + new AbortController().signal + ); + + let content = ""; + let usage: CompletionUsage | undefined; + for await (const chunk of stream) { + if (chunk.choices.length > 0) { + content += chunk.choices[0].delta.content ?? ""; + } + if (chunk.usage) { + usage = chunk.usage; + } + } + + expect(content).toBe("Hello world"); + expect(usage).toBeDefined(); + expect(usage?.prompt_tokens).toBe(10); + expect(usage?.completion_tokens).toBe(5); + expect(usage?.total_tokens).toBe(15); + }); + + test("OpenAI should pass through usage in non-streaming responses", async () => { + const api = new OpenAIApi({ apiKey: "test", provider: "openai" }); + + const mockResponse = { + id: "1", + object: "chat.completion", + created: Date.now(), + model: "gpt-4", + choices: [ + { + index: 0, + message: { + role: "assistant", + content: "Hello world", + refusal: null, + }, + finish_reason: "stop", + logprobs: null, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + }; + + api.openai.chat.completions.create = vi.fn().mockResolvedValue(mockResponse); + + const response = await api.chatCompletionNonStream( + { + model: "gpt-4", + messages: [{ role: "user", content: "Hello" }], + stream: false, + }, + new AbortController().signal + ); + + expect(response.choices[0].message.content).toBe("Hello world"); + expect(response.usage).toBeDefined(); + expect(response.usage?.prompt_tokens).toBe(10); + expect(response.usage?.completion_tokens).toBe(5); + expect(response.usage?.total_tokens).toBe(15); + }); + + test("Anthropic should track usage in non-streaming responses", async () => { + const mockResponse = { + ok: true, + status: 200, + json: vi.fn().mockResolvedValue({ + id: "msg_123", + content: [{ text: "Hello world" }], + usage: { + input_tokens: 10, + output_tokens: 5, + cache_read_input_tokens: 2, + }, + }), + }; + + global.fetch = vi.fn().mockResolvedValue(mockResponse); + + const api = new AnthropicApi({ apiKey: "test", provider: "anthropic" }); + + const response = await api.chatCompletionNonStream( + { + model: "claude-3", + messages: [{ role: "user", content: "Hello" }], + stream: false, + }, + new AbortController().signal + ); + + expect(response.choices[0].message.content).toBe("Hello world"); + expect(response.usage).toBeDefined(); + expect(response.usage?.prompt_tokens).toBe(10); + expect(response.usage?.completion_tokens).toBe(5); + expect(response.usage?.total_tokens).toBe(15); + expect(response.usage?.prompt_tokens_details?.cached_tokens).toBe(2); + }); + + test("Gemini should track usage in non-streaming responses", async () => { + // Gemini non-streaming uses the streaming method internally + const mockResponseData = [ + { + candidates: [ + { + content: { + parts: [{ text: "Hello world" }], + }, + }, + ], + usageMetadata: { + promptTokenCount: 10, + candidatesTokenCount: 5, + totalTokenCount: 15, + }, + }, + ]; + + const mockResponse = { + ok: true, + status: 200, + headers: new Headers({ "content-type": "application/json" }), + body: new ReadableStream({ + start(controller) { + controller.enqueue( + new TextEncoder().encode(JSON.stringify(mockResponseData)) + ); + controller.close(); + }, + }), + }; + + global.fetch = vi.fn().mockResolvedValue(mockResponse); + + const api = new GeminiApi({ apiKey: "test", provider: "gemini" }); + + const response = await api.chatCompletionNonStream( + { + model: "gemini-1.5-flash", + messages: [{ role: "user", content: "Hello" }], + stream: false, + }, + new AbortController().signal + ); + + expect(response.choices[0].message.content).toBe("Hello world"); + expect(response.usage).toBeDefined(); + expect(response.usage?.prompt_tokens).toBe(10); + expect(response.usage?.completion_tokens).toBe(5); + expect(response.usage?.total_tokens).toBe(15); + }); +}); \ No newline at end of file From 6febaa7749080296b03b2bdf6651b3160feb65ce Mon Sep 17 00:00:00 2001 From: Nate Date: Sun, 12 Oct 2025 19:37:12 -0400 Subject: [PATCH 2/2] fix: tests --- .../src/test/token-usage.test.ts | 353 ------------------ packages/openai-adapters/src/test/util.ts | 17 + 2 files changed, 17 insertions(+), 353 deletions(-) delete mode 100644 packages/openai-adapters/src/test/token-usage.test.ts diff --git a/packages/openai-adapters/src/test/token-usage.test.ts b/packages/openai-adapters/src/test/token-usage.test.ts deleted file mode 100644 index 15129dd3f41..00000000000 --- a/packages/openai-adapters/src/test/token-usage.test.ts +++ /dev/null @@ -1,353 +0,0 @@ -import { describe, expect, test, vi } from "vitest"; -import { AnthropicApi } from "../apis/Anthropic.js"; -import { GeminiApi } from "../apis/Gemini.js"; -import { OpenAIApi } from "../apis/OpenAI.js"; -import { CompletionUsage } from "openai/resources/index.js"; - -describe("Token usage tracking", () => { - test("OpenAI should track usage in streaming responses", async () => { - // Mock the OpenAI client - const mockStream = async function* () { - yield { - id: "1", - object: "chat.completion.chunk", - created: Date.now(), - model: "gpt-4", - choices: [ - { - index: 0, - delta: { content: "Hello", role: "assistant" }, - finish_reason: null, - logprobs: null, - }, - ], - }; - yield { - id: "1", - object: "chat.completion.chunk", - created: Date.now(), - model: "gpt-4", - choices: [ - { - index: 0, - delta: { content: " world", role: "assistant" }, - finish_reason: "stop", - logprobs: null, - }, - ], - }; - // Usage chunk - yield { - id: "1", - object: "chat.completion.chunk", - created: Date.now(), - model: "gpt-4", - choices: [], - usage: { - prompt_tokens: 10, - completion_tokens: 5, - total_tokens: 15, - }, - }; - }; - - const api = new OpenAIApi({ apiKey: "test", provider: "openai" }); - api.openai.chat.completions.create = vi.fn().mockResolvedValue(mockStream()); - - const stream = api.chatCompletionStream( - { - model: "gpt-4", - messages: [{ role: "user", content: "Hello" }], - stream: true, - }, - new AbortController().signal - ); - - let content = ""; - let usage: CompletionUsage | undefined; - for await (const chunk of stream) { - if (chunk.choices.length > 0) { - content += chunk.choices[0].delta.content ?? ""; - } - if (chunk.usage) { - usage = chunk.usage; - } - } - - expect(content).toBe("Hello world"); - expect(usage).toBeDefined(); - expect(usage?.prompt_tokens).toBe(10); - expect(usage?.completion_tokens).toBe(5); - expect(usage?.total_tokens).toBe(15); - }); - - test("Anthropic should track usage in streaming responses", async () => { - // Create a mock response that simulates Anthropic's SSE stream - const mockResponseText = `event: message_start -data: {"type":"message_start","message":{"usage":{"input_tokens":10,"cache_read_input_tokens":2}}} - -event: content_block_delta -data: {"type":"content_block_delta","delta":{"type":"text_delta","text":"Hello"}} - -event: content_block_delta -data: {"type":"content_block_delta","delta":{"type":"text_delta","text":" world"}} - -event: message_delta -data: {"type":"message_delta","usage":{"output_tokens":5}} - -event: message_stop -data: {"type":"message_stop"} -`; - - const mockResponse = { - ok: true, - status: 200, - headers: new Headers({ "content-type": "text/event-stream" }), - text: vi.fn().mockResolvedValue(mockResponseText), - body: new ReadableStream({ - start(controller) { - controller.enqueue(new TextEncoder().encode(mockResponseText)); - controller.close(); - }, - }), - }; - - global.fetch = vi.fn().mockResolvedValue(mockResponse); - - const api = new AnthropicApi({ apiKey: "test", provider: "anthropic" }); - - const stream = api.chatCompletionStream( - { - model: "claude-3", - messages: [{ role: "user", content: "Hello" }], - stream: true, - }, - new AbortController().signal - ); - - let content = ""; - let usage: CompletionUsage | undefined; - for await (const chunk of stream) { - if (chunk.choices.length > 0) { - content += chunk.choices[0].delta.content ?? ""; - } - if (chunk.usage) { - usage = chunk.usage; - } - } - - expect(content).toBe("Hello world"); - expect(usage).toBeDefined(); - expect(usage?.prompt_tokens).toBe(10); - expect(usage?.completion_tokens).toBe(5); - expect(usage?.total_tokens).toBe(15); - expect(usage?.prompt_tokens_details?.cached_tokens).toBe(2); - }); - - test("Gemini should track usage in streaming responses", async () => { - // Create a mock response for Gemini streaming - const mockResponseData = [ - { - candidates: [ - { - content: { - parts: [{ text: "Hello" }], - }, - }, - ], - }, - { - candidates: [ - { - content: { - parts: [{ text: " world" }], - }, - }, - ], - usageMetadata: { - promptTokenCount: 10, - candidatesTokenCount: 5, - totalTokenCount: 15, - }, - }, - ]; - - const mockResponse = { - ok: true, - status: 200, - headers: new Headers({ "content-type": "application/json" }), - body: new ReadableStream({ - start(controller) { - controller.enqueue( - new TextEncoder().encode(JSON.stringify(mockResponseData)) - ); - controller.close(); - }, - }), - }; - - global.fetch = vi.fn().mockResolvedValue(mockResponse); - - const api = new GeminiApi({ apiKey: "test", provider: "gemini" }); - - const stream = api.chatCompletionStream( - { - model: "gemini-1.5-flash", - messages: [{ role: "user", content: "Hello" }], - stream: true, - }, - new AbortController().signal - ); - - let content = ""; - let usage: CompletionUsage | undefined; - for await (const chunk of stream) { - if (chunk.choices.length > 0) { - content += chunk.choices[0].delta.content ?? ""; - } - if (chunk.usage) { - usage = chunk.usage; - } - } - - expect(content).toBe("Hello world"); - expect(usage).toBeDefined(); - expect(usage?.prompt_tokens).toBe(10); - expect(usage?.completion_tokens).toBe(5); - expect(usage?.total_tokens).toBe(15); - }); - - test("OpenAI should pass through usage in non-streaming responses", async () => { - const api = new OpenAIApi({ apiKey: "test", provider: "openai" }); - - const mockResponse = { - id: "1", - object: "chat.completion", - created: Date.now(), - model: "gpt-4", - choices: [ - { - index: 0, - message: { - role: "assistant", - content: "Hello world", - refusal: null, - }, - finish_reason: "stop", - logprobs: null, - }, - ], - usage: { - prompt_tokens: 10, - completion_tokens: 5, - total_tokens: 15, - }, - }; - - api.openai.chat.completions.create = vi.fn().mockResolvedValue(mockResponse); - - const response = await api.chatCompletionNonStream( - { - model: "gpt-4", - messages: [{ role: "user", content: "Hello" }], - stream: false, - }, - new AbortController().signal - ); - - expect(response.choices[0].message.content).toBe("Hello world"); - expect(response.usage).toBeDefined(); - expect(response.usage?.prompt_tokens).toBe(10); - expect(response.usage?.completion_tokens).toBe(5); - expect(response.usage?.total_tokens).toBe(15); - }); - - test("Anthropic should track usage in non-streaming responses", async () => { - const mockResponse = { - ok: true, - status: 200, - json: vi.fn().mockResolvedValue({ - id: "msg_123", - content: [{ text: "Hello world" }], - usage: { - input_tokens: 10, - output_tokens: 5, - cache_read_input_tokens: 2, - }, - }), - }; - - global.fetch = vi.fn().mockResolvedValue(mockResponse); - - const api = new AnthropicApi({ apiKey: "test", provider: "anthropic" }); - - const response = await api.chatCompletionNonStream( - { - model: "claude-3", - messages: [{ role: "user", content: "Hello" }], - stream: false, - }, - new AbortController().signal - ); - - expect(response.choices[0].message.content).toBe("Hello world"); - expect(response.usage).toBeDefined(); - expect(response.usage?.prompt_tokens).toBe(10); - expect(response.usage?.completion_tokens).toBe(5); - expect(response.usage?.total_tokens).toBe(15); - expect(response.usage?.prompt_tokens_details?.cached_tokens).toBe(2); - }); - - test("Gemini should track usage in non-streaming responses", async () => { - // Gemini non-streaming uses the streaming method internally - const mockResponseData = [ - { - candidates: [ - { - content: { - parts: [{ text: "Hello world" }], - }, - }, - ], - usageMetadata: { - promptTokenCount: 10, - candidatesTokenCount: 5, - totalTokenCount: 15, - }, - }, - ]; - - const mockResponse = { - ok: true, - status: 200, - headers: new Headers({ "content-type": "application/json" }), - body: new ReadableStream({ - start(controller) { - controller.enqueue( - new TextEncoder().encode(JSON.stringify(mockResponseData)) - ); - controller.close(); - }, - }), - }; - - global.fetch = vi.fn().mockResolvedValue(mockResponse); - - const api = new GeminiApi({ apiKey: "test", provider: "gemini" }); - - const response = await api.chatCompletionNonStream( - { - model: "gemini-1.5-flash", - messages: [{ role: "user", content: "Hello" }], - stream: false, - }, - new AbortController().signal - ); - - expect(response.choices[0].message.content).toBe("Hello world"); - expect(response.usage).toBeDefined(); - expect(response.usage?.prompt_tokens).toBe(10); - expect(response.usage?.completion_tokens).toBe(5); - expect(response.usage?.total_tokens).toBe(15); - }); -}); \ No newline at end of file diff --git a/packages/openai-adapters/src/test/util.ts b/packages/openai-adapters/src/test/util.ts index c36e568480e..d921306d16a 100644 --- a/packages/openai-adapters/src/test/util.ts +++ b/packages/openai-adapters/src/test/util.ts @@ -207,6 +207,23 @@ export function testChat( const completion = response.choices[0].message.content; expect(typeof completion).toBe("string"); expect(completion?.length).toBeGreaterThan(0); + + if (options?.expectUsage === true) { + expect(response.usage).toBeDefined(); + expect(response.usage!.completion_tokens).toBeGreaterThan(0); + expect(response.usage!.prompt_tokens).toBeGreaterThan(0); + // Gemini 2.5 models have thinking tokens, so total_tokens >= prompt + completion + // Other models should have total_tokens = prompt + completion + if (model.includes("gemini-2.5") || model.includes("gemini-2.0")) { + expect(response.usage!.total_tokens).toBeGreaterThanOrEqual( + response.usage!.prompt_tokens + response.usage!.completion_tokens, + ); + } else { + expect(response.usage!.total_tokens).toEqual( + response.usage!.prompt_tokens + response.usage!.completion_tokens, + ); + } + } }); test("should acknowledge system message in chat", async () => {