Skip to content

Commit 897a46d

Browse files
committed
feature: add TypoOption field to control requiring non-english words
1 parent 9cd92f5 commit 897a46d

File tree

2 files changed

+15
-2
lines changed

2 files changed

+15
-2
lines changed

server/src/data/models.rs

+2
Original file line numberDiff line numberDiff line change
@@ -5296,6 +5296,8 @@ pub struct TypoOptions {
52965296
pub two_typo_word_range: Option<TypoRange>,
52975297
/// Words that should not be corrected. If not specified, this defaults to an empty list.
52985298
pub disable_on_word: Option<Vec<String>>,
5299+
/// Auto-require non-english words present in the dataset to exist in each results chunk_html text. If not specified, this defaults to true.
5300+
pub auto_require_non_english_words: Option<bool>,
52995301
}
53005302

53015303
#[derive(Serialize, Deserialize, Debug, Clone, ToSchema, Default)]

server/src/operators/typo_operator.rs

+13-2
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,8 @@ fn correct_query_helper(
573573
) -> CorrectedQuery {
574574
let query_words: Vec<&str> = query.query.split_whitespace().collect();
575575
let mut corrections = HashMap::new();
576+
let mut new_quote_words = Vec::new();
577+
576578
let excluded_words: HashSet<_> = options
577579
.disable_on_word
578580
.clone()
@@ -608,7 +610,10 @@ fn correct_query_helper(
608610
continue;
609611
}
610612

611-
if !tree.find(word.to_string(), 0).is_empty() {
613+
if options.auto_require_non_english_words.unwrap_or(true)
614+
&& !tree.find(word.to_string(), 0).is_empty()
615+
{
616+
new_quote_words.push(word);
612617
query.quote_words = match query.quote_words {
613618
Some(mut existing_words) => {
614619
existing_words.push(word.to_string());
@@ -659,16 +664,22 @@ fn correct_query_helper(
659664
}
660665
}
661666

662-
if corrections.is_empty() {
667+
if corrections.is_empty() && new_quote_words.is_empty() {
663668
CorrectedQuery {
664669
query: Some(query),
665670
corrected: false,
666671
}
667672
} else {
668673
let mut corrected_query = query.query.clone();
674+
669675
for (original, correction) in corrections {
670676
corrected_query = corrected_query.replace(original, &correction);
671677
}
678+
679+
for word in new_quote_words {
680+
corrected_query = corrected_query.replace(word, &format!("\"{}\"", word));
681+
}
682+
672683
query.query = corrected_query;
673684
CorrectedQuery {
674685
query: Some(query),

0 commit comments

Comments
 (0)