@@ -409,3 +409,92 @@ fn chapter_settings_priority() {
409409        ) ; 
410410    } 
411411} 
412+ 
413+ #[ cfg( test) ]  
414+ mod  tests { 
415+     use  super :: * ; 
416+ 
417+     #[ test]  
418+     fn  test_tokenize_basic ( )  { 
419+         assert_eq ! ( tokenize( "hello world" ) ,  vec![ "hello" ,  "world" ] ) ; 
420+     } 
421+ 
422+     #[ test]  
423+     fn  test_tokenize_with_hyphens ( )  { 
424+         assert_eq ! ( 
425+             tokenize( "hello-world test-case" ) , 
426+             vec![ "hello" ,  "world" ,  "test" ,  "case" ] 
427+         ) ; 
428+     } 
429+ 
430+     #[ test]  
431+     fn  test_tokenize_mixed_whitespace ( )  { 
432+         assert_eq ! ( 
433+             tokenize( "hello\t world\n test\r \n case" ) , 
434+             vec![ "hello" ,  "world" ,  "test" ,  "case" ] 
435+         ) ; 
436+     } 
437+ 
438+     #[ test]  
439+     fn  test_tokenize_empty_string ( )  { 
440+         assert_eq ! ( tokenize( "" ) ,  Vec :: <String >:: new( ) ) ; 
441+     } 
442+ 
443+     #[ test]  
444+     fn  test_tokenize_only_whitespace ( )  { 
445+         assert_eq ! ( tokenize( "   \t \n   " ) ,  Vec :: <String >:: new( ) ) ; 
446+     } 
447+ 
448+     #[ test]  
449+     fn  test_tokenize_case_normalization ( )  { 
450+         assert_eq ! ( tokenize( "Hello WORLD Test" ) ,  vec![ "hello" ,  "world" ,  "test" ] ) ; 
451+     } 
452+ 
453+     #[ test]  
454+     fn  test_tokenize_trim_whitespace ( )  { 
455+         assert_eq ! ( tokenize( "  hello   world  " ) ,  vec![ "hello" ,  "world" ] ) ; 
456+     } 
457+ 
458+     #[ test]  
459+     fn  test_tokenize_long_words_filtered ( )  { 
460+         let  long_word = "a" . repeat ( MAX_WORD_LENGTH_TO_INDEX  + 1 ) ; 
461+         let  short_word = "a" . repeat ( MAX_WORD_LENGTH_TO_INDEX ) ; 
462+         let  input = format ! ( "{} hello {}" ,  long_word,  short_word) ; 
463+         assert_eq ! ( tokenize( & input) ,  vec![ "hello" ,  & short_word] ) ; 
464+     } 
465+ 
466+     #[ test]  
467+     fn  test_tokenize_max_length_word ( )  { 
468+         let  max_word = "a" . repeat ( MAX_WORD_LENGTH_TO_INDEX ) ; 
469+         assert_eq ! ( tokenize( & max_word) ,  vec![ max_word] ) ; 
470+     } 
471+ 
472+     #[ test]  
473+     fn  test_tokenize_special_characters ( )  { 
474+         assert_eq ! ( 
475+             tokenize( "hello,world.test!case?" ) , 
476+             vec![ "hello,world.test!case?" ] 
477+         ) ; 
478+     } 
479+ 
480+     #[ test]  
481+     fn  test_tokenize_unicode ( )  { 
482+         assert_eq ! ( 
483+             tokenize( "café naïve résumé" ) , 
484+             vec![ "café" ,  "naïve" ,  "résumé" ] 
485+         ) ; 
486+     } 
487+ 
488+     #[ test]  
489+     fn  test_tokenize_unicode_rtl_hebre ( )  { 
490+         assert_eq ! ( tokenize( "שלום עולם" ) ,  vec![ "שלום" ,  "עולם" ] ) ; 
491+     } 
492+ 
493+     #[ test]  
494+     fn  test_tokenize_numbers ( )  { 
495+         assert_eq ! ( 
496+             tokenize( "test123 456-789 hello" ) , 
497+             vec![ "test123" ,  "456" ,  "789" ,  "hello" ] 
498+         ) ; 
499+     } 
500+ } 
0 commit comments