Skip to content
This repository was archived by the owner on Jul 31, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 6 additions & 12 deletions crates/meilisearch/tests/search/locales.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,23 +147,20 @@ async fn simple_search() {
.search(
json!({"q": "進撃", "locales": ["jpn"], "attributesToRetrieve": ["id"]}),
|response, code| {
snapshot!(response, @r###"
snapshot!(response, @r#"
{
"hits": [
{
"id": 852
},
{
"id": 853
}
],
"query": "進撃",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 2
"estimatedTotalHits": 1
}
"###);
"#);
snapshot!(code, @"200 OK");
},
)
Expand All @@ -172,23 +169,20 @@ async fn simple_search() {
// chinese
index
.search(json!({"q": "进击", "attributesToRetrieve": ["id"]}), |response, code| {
snapshot!(response, @r###"
snapshot!(response, @r#"
{
"hits": [
{
"id": 853
},
{
"id": 852
}
],
"query": "进击",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 2
"estimatedTotalHits": 1
}
"###);
"#);
snapshot!(code, @"200 OK");
})
.await;
Expand Down
62 changes: 60 additions & 2 deletions crates/milli/src/search/new/query_term/parse_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,11 +202,11 @@ pub fn number_of_typos_allowed<'ctx>(

Ok(Box::new(move |word: &str| {
if !authorize_typos
|| word.len() < min_len_one_typo as usize
|| word.chars().count() < min_len_one_typo as usize
|| exact_words.as_ref().is_some_and(|fst| fst.contains(word))
{
0
} else if word.len() < min_len_two_typos as usize {
} else if word.chars().count() < min_len_two_typos as usize {
1
} else {
2
Expand Down Expand Up @@ -380,4 +380,62 @@ mod tests {

Ok(())
}

#[test]
fn test_unicode_typo_tolerance_fixed() -> Result<()> {
let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn()?;
let ctx = SearchContext::new(&temp_index, &rtxn)?;

let nbr_typos = number_of_typos_allowed(&ctx)?;

// ASCII word "doggy" (5 chars, 5 bytes)
let ascii_word = "doggy";
let ascii_typos = nbr_typos(ascii_word);

// Cyrillic word "собак" (5 chars, 10 bytes)
let cyrillic_word = "собак";
let cyrillic_typos = nbr_typos(cyrillic_word);

// Both words have 5 characters, so they should have the same typo tolerance
assert_eq!(
ascii_typos, cyrillic_typos,
"Words with same character count should get same typo tolerance"
);

// With default settings (oneTypo=5, twoTypos=9), 5-char words should get 1 typo
assert_eq!(ascii_typos, 1, "5-character word should get 1 typo tolerance");
assert_eq!(cyrillic_typos, 1, "5-character word should get 1 typo tolerance");

Ok(())
}

#[test]
fn test_various_unicode_scripts() -> Result<()> {
let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn()?;
let ctx = SearchContext::new(&temp_index, &rtxn)?;

let nbr_typos = number_of_typos_allowed(&ctx)?;

// Let's use 5-character words for consistent testing
let five_char_words = vec![
("doggy", "ASCII"), // 5 chars, 5 bytes
("café!", "Accented"), // 5 chars, 7 bytes
("собак", "Cyrillic"), // 5 chars, 10 bytes
];

let expected_typos = 1; // With default settings, 5-char words get 1 typo

for (word, script) in five_char_words {
let typos = nbr_typos(word);
assert_eq!(
typos, expected_typos,
"{} word '{}' should get {} typo(s)",
script, word, expected_typos
);
}

Ok(())
}
}