Skip to content

Commit c6a88ee

Browse files
committed
Use an even shorter expression for the typed array size.
it was realized that the array size doesn't have to be exact. we keep at most two significant digits of # contexts, so we might use at most 10% more memory than it should (okay) and end up using more memory than requested (not okay). contextBits calculation has been adjusted to avoid this issue.
1 parent eb74630 commit c6a88ee

File tree

3 files changed

+35
-15
lines changed

3 files changed

+35
-15
lines changed

cli.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ async function compress({ inputs, options, optimize, outputPath, verbose }) {
309309

310310
if (verbose >= 1) {
311311
console.warn(
312-
`Actual memory usage: ${packer.memoryUsageMB < 1 ? '< 1' : packer.memoryUsageMB} MB` +
312+
`Actual memory usage: ${packer.memoryUsageMB < 1 ? '< 1' : packer.memoryUsageMB.toFixed(1)} MB` +
313313
(options.contextBits ? '' : ` (out of ${options.maxMemoryMB || 150} MB)`));
314314
}
315315

index.mjs

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,25 @@ const ceilLog2 = (x, y = 1) => {
4444
return n;
4545
};
4646

47+
// returns [m, e, m * 10^e] where (m-1) * 10^e < v <= m * 10^e, m < 100 and m mod 10 != 0.
48+
// therefore `${m}e${e}` is an upper bound approximation with ~2 significant digits.
49+
const approximateWithTwoSigDigits = v => {
50+
if (v <= 0) return [0, 0, 0]; // special case
51+
let exp = 0;
52+
let tens = 1;
53+
while (v >= tens * 100) {
54+
++exp;
55+
tens *= 10;
56+
}
57+
let mant = Math.ceil(v / tens);
58+
if (mant % 10 === 0) { // 60e6 -> 6e7
59+
mant /= 10;
60+
++exp;
61+
tens *= 10;
62+
}
63+
return [mant, exp, mant * tens];
64+
};
65+
4766
// Node.js 14 doesn't have a global performance object.
4867
const getPerformanceObject = async () => {
4968
return globalThis.performance || (await import('perf_hooks')).performance;
@@ -587,7 +606,14 @@ const countBytesPerContext = options => (options.modelMaxCount < 128 ? 1 : optio
587606

588607
const contextBitsFromMaxMemory = options => {
589608
const bytesPerContext = predictionBytesPerContext(options) + countBytesPerContext(options);
590-
return floorLog2(options.maxMemoryMB * 1048576, options.sparseSelectors.length * bytesPerContext);
609+
let contextBits = floorLog2(options.maxMemoryMB * 1048576, options.sparseSelectors.length * bytesPerContext);
610+
611+
// the decoder slightly overallocates the memory (~1%) so a naive calculation can exceed the memory limit;
612+
// recalculate the actual memory usage and decrease contextBits in that case.
613+
const [, , actualNumContexts] = approximateWithTwoSigDigits(options.sparseSelectors.length << contextBits);
614+
if (actualNumContexts * bytesPerContext > options.maxMemoryMB * 1048576) --contextBits;
615+
616+
return contextBits;
591617
};
592618

593619
// String.fromCharCode(...array) is short but doesn't work when array.length is "long enough".
@@ -650,8 +676,9 @@ export class Packer {
650676

651677
get memoryUsageMB() {
652678
const contextBits = this.options.contextBits || contextBitsFromMaxMemory(this.options);
679+
const [, , numContexts] = approximateWithTwoSigDigits(this.options.sparseSelectors.length << contextBits);
653680
const bytesPerContext = predictionBytesPerContext(this.options) + countBytesPerContext(this.options);
654-
return this.options.sparseSelectors.length * bytesPerContext * (1 << contextBits) / 1048576;
681+
return numContexts * bytesPerContext / 1048576;
655682
}
656683

657684
static prepareText(inputs) {
@@ -959,16 +986,9 @@ export class Packer {
959986
return 'θ';
960987
};
961988

962-
let contextSize;
963-
{
964-
let v = numModels, shift = contextBits;
965-
while (v % 2 == 0) {
966-
v >>= 1;
967-
++shift;
968-
}
969-
contextSize = `${v}<<${shift}`;
970-
// we can also make use of θ, but that wouldn't work in the argument position
971-
}
989+
// only keep two significant digits, rounding up
990+
const [contextMant, contextExp] = approximateWithTwoSigDigits(numModels << contextBits);
991+
const contextSize = `${contextMant}e${contextExp}`;
972992

973993
// 0. first line
974994
// ι: compressed data, where lowest 6 bits are used and higher bits are chosen to avoid escape sequences.

tools/demo.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@
104104
<aside id=$outputmessage></aside>
105105
<footer><ul>
106106
<li><label>Number of contexts: <input id=$numcontexts type=number value=12 min=1 max=64></label> <a href=#num-contexts title=Help>ℹ️</a>
107-
<li><label>Maximum memory usage: <input id=$maxmemory type=number value=150 min=10 max=1024> MB (<span id=$usedmemory>96</span> MB in use)</label> <a href=#max-memory title=Help>ℹ️</a>
107+
<li><label>Maximum memory usage: <input id=$maxmemory type=number value=150 min=10 max=1024> MB (<span id=$usedmemory>-</span> MB in use)</label> <a href=#max-memory title=Help>ℹ️</a>
108108
<li><label><input type=checkbox id=$dirty> Allow the decoder to pollute the global scope</label> <a href=#dirty title=Help>ℹ️</a>
109109
<li><button id=$reset>Reset parameters</button> <button id=$optimize>Optimize parameters</button> <a href=#optimize title=Help>ℹ️</a>
110110
</ul><details><summary>Advanced configuration</summary><ul>
@@ -415,7 +415,7 @@ <h2>Command-line Usage and API</h2>
415415

416416
let prefix = '';
417417
const packer = new Packer(inputs, options);
418-
$usedmemory.textContent = packer.memoryUsageMB;
418+
$usedmemory.textContent = packer.memoryUsageMB.toFixed(1);
419419

420420
if (optimize) {
421421
if (lastOptimizationLevel < 2) ++lastOptimizationLevel;

0 commit comments

Comments
 (0)