11import { SmartTerm } from "../../shared/interfaces" ;
22import { cutZhWords } from "./cutZhWords" ;
33
4+ const MAX_TERMS = 12 ;
5+ const HALF_MAX_TERMS = MAX_TERMS / 2 ;
6+
47/**
58 * Get all possible terms for a list of tokens consists of words mixed in Chinese and non-Chinese,
69 * by a Chinese words dictionary.
@@ -14,19 +17,53 @@ export function smartTerms(
1417 tokens : string [ ] ,
1518 zhDictionary : string [ ]
1619) : SmartTerm [ ] {
17- const tokenTerms = tokens . map ( ( token ) => {
18- if ( / \p{ Unified_Ideograph} / u. test ( token ) ) {
19- return cutZhWords ( token , zhDictionary ) ;
20+ const tokenTerms = tokens
21+ . map ( ( token ) => {
22+ if ( / \p{ Unified_Ideograph} / u. test ( token ) ) {
23+ return cutZhWords ( token , zhDictionary ) ;
24+ } else {
25+ return [ { value : token } ] ;
26+ }
27+ } )
28+ . slice ( 0 , MAX_TERMS ) ;
29+
30+ const tokenTermsThatAreMultiple = tokenTerms . filter (
31+ ( tokenTerm ) => tokenTerm . length > 1
32+ ) ;
33+
34+ let termsProduct = 1 ;
35+ let overflowed = false ;
36+
37+ for ( const tokenTerm of tokenTermsThatAreMultiple ) {
38+ if ( overflowed ) {
39+ tokenTerm . splice ( 1 , tokenTerm . length - 1 ) ;
2040 } else {
21- return [ { value : token } ] ;
41+ if ( tokenTerm . length > HALF_MAX_TERMS ) {
42+ tokenTerm . splice ( HALF_MAX_TERMS , tokenTerm . length - HALF_MAX_TERMS ) ;
43+ }
44+ const product = termsProduct * tokenTerm . length ;
45+ if ( product >= MAX_TERMS ) {
46+ if ( product > MAX_TERMS ) {
47+ const max = Math . floor ( MAX_TERMS / termsProduct ) ;
48+ tokenTerm . splice ( max , tokenTerm . length - max ) ;
49+ termsProduct = max * termsProduct ;
50+ } else {
51+ termsProduct = product ;
52+ }
53+ if ( termsProduct > HALF_MAX_TERMS ) {
54+ overflowed = true ;
55+ }
56+ } else {
57+ termsProduct = product ;
58+ }
2259 }
23- } ) ;
60+ }
2461
2562 // Get all possible combinations of terms.
2663 const terms : SmartTerm [ ] = [ ] ;
2764 function combine ( index : number , carry : SmartTerm ) : void {
28- if ( index === tokenTerms . length ) {
29- terms . push ( carry ) ;
65+ if ( index === tokenTerms . length || carry . length >= MAX_TERMS ) {
66+ terms . push ( carry . slice ( 0 , MAX_TERMS ) ) ;
3067 return ;
3168 }
3269 for ( const term of tokenTerms [ index ] ) {
0 commit comments