Skip to content

Commit 8b20dcb

Browse files
committed
Adding AI Token Counter
1 parent c57556f commit 8b20dcb

File tree

3 files changed

+107
-0
lines changed

3 files changed

+107
-0
lines changed

package-lock.json

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@
132132
"file-saver": "^2.0.5",
133133
"flat": "^6.0.1",
134134
"geodesy": "1.1.3",
135+
"gpt-tokenizer": "^2.9.0",
135136
"handlebars": "^4.7.8",
136137
"hash-wasm": "^4.12.0",
137138
"highlight.js": "^11.9.0",
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/**
2+
* @author grmartin [grmartin]
3+
* @copyright Crown Copyright 2016
4+
* @license Apache-2.0
5+
*/
6+
7+
import Operation from "../Operation.mjs";
8+
9+
// This mapping returns a Promise that resolves to the correct countTokens function for the model.
10+
const MODEL_TO_COUNT_TOKENS = {
11+
// cl100k_base models
12+
"gpt-4": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
13+
"gpt-4-32k": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
14+
"gpt-4-turbo": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
15+
"gpt-4o": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
16+
"gpt-4-0125-preview": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
17+
"gpt-4-1106-preview": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
18+
"gpt-3.5-turbo": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
19+
"gpt-3.5-turbo-16k": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
20+
"gpt-3.5-turbo-instruct": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
21+
"gpt-3.5-turbo-0125": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
22+
"gpt-3.5-turbo-1106": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
23+
"text-embedding-ada-002": () => import("gpt-tokenizer/model/text-embedding-ada-002").then(m => m.countTokens),
24+
"text-embedding-3-large": () => import("gpt-tokenizer/model/text-embedding-3-large").then(m => m.countTokens),
25+
"text-embedding-3-small": () => import("gpt-tokenizer/model/text-embedding-3-small").then(m => m.countTokens),
26+
27+
// p50k_base models
28+
"code-davinci-002": () => import("gpt-tokenizer/model/code-davinci-002").then(m => m.countTokens),
29+
"code-davinci-001": () => import("gpt-tokenizer/model/code-davinci-002").then(m => m.countTokens),
30+
"code-cushman-002": () => import("gpt-tokenizer/model/code-cushman-002").then(m => m.countTokens),
31+
"code-cushman-001": () => import("gpt-tokenizer/model/code-cushman-002").then(m => m.countTokens),
32+
"text-davinci-002": () => import("gpt-tokenizer/model/text-davinci-002").then(m => m.countTokens),
33+
"text-davinci-003": () => import("gpt-tokenizer/model/text-davinci-003").then(m => m.countTokens),
34+
35+
// p50k_edit models
36+
"text-davinci-edit-001": () => import("gpt-tokenizer/model/text-davinci-edit-001").then(m => m.countTokens),
37+
"code-davinci-edit-001": () => import("gpt-tokenizer/model/code-davinci-edit-001").then(m => m.countTokens),
38+
39+
// r50k_base models
40+
"davinci": () => import("gpt-tokenizer/model/davinci").then(m => m.countTokens),
41+
"curie": () => import("gpt-tokenizer/model/curie").then(m => m.countTokens),
42+
"babbage": () => import("gpt-tokenizer/model/babbage").then(m => m.countTokens),
43+
"ada": () => import("gpt-tokenizer/model/ada").then(m => m.countTokens),
44+
};
45+
46+
47+
/**
48+
* Count AI Tokens operation
49+
*/
50+
class CountAITokens extends Operation {
51+
52+
/**
53+
* Count AI Tokens constructor
54+
*/
55+
constructor() {
56+
super();
57+
58+
this.name = "Count AI Tokens";
59+
this.module = "AI";
60+
this.infoURL = "https://github.com/niieani/gpt-tokenizer";
61+
this.description = "Counts the number of GPT tokens in the input text using niieani/gpt-tokenizer. Select the model to use the correct encoding.";
62+
this.inputType = "string";
63+
this.outputType = "string";
64+
this.args = [
65+
{
66+
name: "Model",
67+
type: "option",
68+
value: Object.keys(MODEL_TO_COUNT_TOKENS),
69+
}
70+
];
71+
}
72+
73+
/**
74+
* @param {string} input
75+
* @param {Object[]} args
76+
* @returns {string}
77+
*/
78+
async run(input, args) {
79+
if (!input) return "";
80+
// const [model] = args;
81+
// // Use the mapping, fallback to cl100k_base if not found
82+
// const encoding = MODEL_TO_ENCODING[model] || cl100k_base;
83+
// const tokenCount = encoding.;
84+
// return tokenCount.toString();
85+
const [model] = args;
86+
let countTokensFn;
87+
if (MODEL_TO_COUNT_TOKENS[model]) {
88+
countTokensFn = await MODEL_TO_COUNT_TOKENS[model]();
89+
} else {
90+
// fallback to default (gpt-3.5-turbo encoding)
91+
countTokensFn = (await import("gpt-tokenizer/model/gpt-3.5-turbo")).countTokens;
92+
}
93+
const tokenCount = countTokensFn(input);
94+
return tokenCount.toString();
95+
}
96+
97+
}
98+
99+
export default CountAITokens;
100+

0 commit comments

Comments
 (0)