Skip to content

Commit

Permalink
add stop words for highlight
Browse files Browse the repository at this point in the history
  • Loading branch information
Endle committed Dec 21, 2023
1 parent 5ab9ccc commit 4623c53
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 6 deletions.
3 changes: 2 additions & 1 deletion fire_seq_search_server/src/language_tools/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ pub fn generate_stopwords_list() -> HashSet<String> {
nltk.insert("theorem");
nltk.insert("-");

nltk.insert("view");


let mut nltk: HashSet<String> = nltk.iter().map(|&s|s.into()).collect();
Expand Down Expand Up @@ -87,4 +88,4 @@ mod test_language_detect {
assert!(!is_chinese("Это статья ."));
}
}
// assert_eq!(detected_language, Some(English));
// assert_eq!(detected_language, Some(English));
5 changes: 0 additions & 5 deletions fire_seq_search_server/src/post_query/highlighter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,10 @@ pub fn highlight_keywords_in_body(body: &str, term_tokens: &Vec<String>,
show_summary_single_line_chars_limit: usize) -> String {

let blocks = split_body_to_blocks(body, show_summary_single_line_chars_limit);
// let nltk = generate_stopwords_list();
let nltk = &STOPWORDS_LIST;

let terms_selected: Vec<&str> = crate::language_tools::tokenizer::filter_out_stopwords(
&term_tokens, nltk);
// let term_ref: Vec<&str> = term_tokens.iter().map(|s| &**s).collect();
// let terms_selected: Vec<&str> = term_ref.into_iter()
// .filter(|&s| !nltk.contains(s))
// .collect();
info!("Highlight terms: {:?}", &terms_selected);


Expand Down

0 comments on commit 4623c53

Please sign in to comment.