From 0c020cbca5c4f1366d32796ff2f800fdc75cb1c1 Mon Sep 17 00:00:00 2001 From: skeptrune Date: Mon, 2 Sep 2024 15:28:20 -0700 Subject: [PATCH] feature: add TypoOption field to control requiring non-english words --- server/src/data/models.rs | 2 ++ server/src/operators/typo_operator.rs | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/server/src/data/models.rs b/server/src/data/models.rs index 1111843cd7..c194b911a2 100644 --- a/server/src/data/models.rs +++ b/server/src/data/models.rs @@ -5296,6 +5296,8 @@ pub struct TypoOptions { pub two_typo_word_range: Option, /// Words that should not be corrected. If not specified, this defaults to an empty list. pub disable_on_word: Option>, + /// Auto-require non-english words present in the dataset to exist in each results chunk_html text. If not specified, this defaults to true. + pub auto_require_non_english_words: Option, } #[derive(Serialize, Deserialize, Debug, Clone, ToSchema, Default)] diff --git a/server/src/operators/typo_operator.rs b/server/src/operators/typo_operator.rs index 01bc84be09..9e859501ce 100644 --- a/server/src/operators/typo_operator.rs +++ b/server/src/operators/typo_operator.rs @@ -573,6 +573,8 @@ fn correct_query_helper( ) -> CorrectedQuery { let query_words: Vec<&str> = query.query.split_whitespace().collect(); let mut corrections = HashMap::new(); + let mut new_quote_words = Vec::new(); + let excluded_words: HashSet<_> = options .disable_on_word .clone() @@ -608,7 +610,10 @@ fn correct_query_helper( continue; } - if !tree.find(word.to_string(), 0).is_empty() { + if options.auto_require_non_english_words.unwrap_or(true) + && !tree.find(word.to_string(), 0).is_empty() + { + new_quote_words.push(word); query.quote_words = match query.quote_words { Some(mut existing_words) => { existing_words.push(word.to_string()); @@ -659,16 +664,22 @@ fn correct_query_helper( } } - if corrections.is_empty() { + if corrections.is_empty() && new_quote_words.is_empty() { CorrectedQuery { query: Some(query), corrected: false, } } else { let mut corrected_query = query.query.clone(); + for (original, correction) in corrections { corrected_query = corrected_query.replace(original, &correction); } + + for word in new_quote_words { + corrected_query = corrected_query.replace(word, &format!("\"{}\"", word)); + } + query.query = corrected_query; CorrectedQuery { query: Some(query),