feat: add token usage tracking to OpenAI adapter (#7900)

sestinj · claude · web-flow · commit 0a5acb1f3269 · 2025-10-12T16:52:37.000-07:00
* feat: add token usage tracking to OpenAI adapter - Modified OpenAI adapter to properly handle and emit usage chunks in streaming responses - Added logic to store usage chunks and emit them at the end of the stream - Verified Anthropic and Gemini adapters already have complete token usage implementations - Added comprehensive tests for token usage tracking across all three providers - All tests passing with provided API keys 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * fix: tests --------- Co-authored-by: Claude <noreply@anthropic.com>
diff --git a/packages/openai-adapters/src/apis/OpenAI.ts b/packages/openai-adapters/src/apis/OpenAI.ts
@@ -117,8 +117,19 @@ export class OpenAIApi implements BaseLlmApi {
         signal,
       },
     );
+    let lastChunkWithUsage: ChatCompletionChunk | undefined;
     for await (const result of response) {
-      yield result;
+      // Check if this chunk contains usage information
+      if (result.usage) {
+        // Store it to emit after all content chunks
+        lastChunkWithUsage = result;
+      } else {
+        yield result;
+      }
+    }
+    // Emit the usage chunk at the end if we have one
+    if (lastChunkWithUsage) {
+      yield lastChunkWithUsage;
     }
   }
   async completionNonStream(
diff --git a/packages/openai-adapters/src/test/util.ts b/packages/openai-adapters/src/test/util.ts
@@ -207,6 +207,23 @@ export function testChat(
     const completion = response.choices[0].message.content;
     expect(typeof completion).toBe("string");
     expect(completion?.length).toBeGreaterThan(0);
+
+    if (options?.expectUsage === true) {
+      expect(response.usage).toBeDefined();
+      expect(response.usage!.completion_tokens).toBeGreaterThan(0);
+      expect(response.usage!.prompt_tokens).toBeGreaterThan(0);
+      // Gemini 2.5 models have thinking tokens, so total_tokens >= prompt + completion
+      // Other models should have total_tokens = prompt + completion
+      if (model.includes("gemini-2.5") || model.includes("gemini-2.0")) {
+        expect(response.usage!.total_tokens).toBeGreaterThanOrEqual(
+          response.usage!.prompt_tokens + response.usage!.completion_tokens,
+        );
+      } else {
+        expect(response.usage!.total_tokens).toEqual(
+          response.usage!.prompt_tokens + response.usage!.completion_tokens,
+        );
+      }
+    }
   });
 
   test("should acknowledge system message in chat", async () => {