Skip to content

Commit 736344b

Browse files
authored
Merge pull request #465 from easyops-cn/steve/perf
fix: limit the number of terms, closes #312
2 parents 50f93f6 + c889047 commit 736344b

File tree

2 files changed

+44
-18
lines changed

2 files changed

+44
-18
lines changed

docusaurus-search-local/src/client/utils/smartQueries.ts

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -81,17 +81,6 @@ export function smartQueries(
8181
refinedTerms = terms.slice();
8282
}
8383

84-
const MAX_TERMS = 10;
85-
if (refinedTerms.length > MAX_TERMS) {
86-
// Sort terms by length in ascending order.,
87-
// And keep the top 10 terms.
88-
refinedTerms.sort((a, b) => a.length - b.length);
89-
refinedTerms.splice(MAX_TERMS, refinedTerms.length - MAX_TERMS);
90-
91-
terms.sort((a, b) => a.length - b.length);
92-
terms.splice(MAX_TERMS, terms.length - MAX_TERMS);
93-
}
94-
9584
// Also try to add extra terms which miss one of the searched tokens,
9685
// when the term contains 3 or more tokens,
9786
// to improve the search precision.

docusaurus-search-local/src/client/utils/smartTerms.ts

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import { SmartTerm } from "../../shared/interfaces";
22
import { cutZhWords } from "./cutZhWords";
33

4+
const MAX_TERMS = 12;
5+
const HALF_MAX_TERMS = MAX_TERMS / 2;
6+
47
/**
58
* Get all possible terms for a list of tokens consists of words mixed in Chinese and non-Chinese,
69
* by a Chinese words dictionary.
@@ -14,19 +17,53 @@ export function smartTerms(
1417
tokens: string[],
1518
zhDictionary: string[]
1619
): SmartTerm[] {
17-
const tokenTerms = tokens.map((token) => {
18-
if (/\p{Unified_Ideograph}/u.test(token)) {
19-
return cutZhWords(token, zhDictionary);
20+
const tokenTerms = tokens
21+
.map((token) => {
22+
if (/\p{Unified_Ideograph}/u.test(token)) {
23+
return cutZhWords(token, zhDictionary);
24+
} else {
25+
return [{ value: token }];
26+
}
27+
})
28+
.slice(0, MAX_TERMS);
29+
30+
const tokenTermsThatAreMultiple = tokenTerms.filter(
31+
(tokenTerm) => tokenTerm.length > 1
32+
);
33+
34+
let termsProduct = 1;
35+
let overflowed = false;
36+
37+
for (const tokenTerm of tokenTermsThatAreMultiple) {
38+
if (overflowed) {
39+
tokenTerm.splice(1, tokenTerm.length - 1);
2040
} else {
21-
return [{ value: token }];
41+
if (tokenTerm.length > HALF_MAX_TERMS) {
42+
tokenTerm.splice(HALF_MAX_TERMS, tokenTerm.length - HALF_MAX_TERMS);
43+
}
44+
const product = termsProduct * tokenTerm.length;
45+
if (product >= MAX_TERMS) {
46+
if (product > MAX_TERMS) {
47+
const max = Math.floor(MAX_TERMS / termsProduct);
48+
tokenTerm.splice(max, tokenTerm.length - max);
49+
termsProduct = max * termsProduct;
50+
} else {
51+
termsProduct = product;
52+
}
53+
if (termsProduct > HALF_MAX_TERMS) {
54+
overflowed = true;
55+
}
56+
} else {
57+
termsProduct = product;
58+
}
2259
}
23-
});
60+
}
2461

2562
// Get all possible combinations of terms.
2663
const terms: SmartTerm[] = [];
2764
function combine(index: number, carry: SmartTerm): void {
28-
if (index === tokenTerms.length) {
29-
terms.push(carry);
65+
if (index === tokenTerms.length || carry.length >= MAX_TERMS) {
66+
terms.push(carry.slice(0, MAX_TERMS));
3067
return;
3168
}
3269
for (const term of tokenTerms[index]) {

0 commit comments

Comments
 (0)