Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 14 additions & 29 deletions http-ui/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use milli::update::{
ClearDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting,
};
use milli::{
obkv_to_json, CompressionType, Filter as MilliFilter, FilterCondition, Index, MatchingWords,
obkv_to_json, CompressionType, Filter as MilliFilter, FilterCondition, Index, MatcherBuilder,
SearchResult, SortError,
};
use once_cell::sync::OnceCell;
Expand Down Expand Up @@ -152,43 +152,25 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
Self { analyzer }
}

fn highlight_value(&self, value: Value, matching_words: &MatchingWords) -> Value {
fn highlight_value(&self, value: Value, matcher_builder: &MatcherBuilder) -> Value {
match value {
Value::Null => Value::Null,
Value::Bool(boolean) => Value::Bool(boolean),
Value::Number(number) => Value::Number(number),
Value::String(old_string) => {
let mut string = String::new();
let analyzed = self.analyzer.analyze(&old_string);
for (word, token) in analyzed.reconstruct() {
if token.is_word() {
match matching_words.matching_bytes(&token) {
Some(chars_to_highlight) => {
let mut chars = word.chars();

string.push_str("<mark>");
// push the part to highlight
string.extend(chars.by_ref().take(chars_to_highlight));
string.push_str("</mark>");
// push the suffix after highlight
string.extend(chars);
}
// no highlight
None => string.push_str(word),
}
} else {
string.push_str(word);
}
}
Value::String(string)
let analyzed: Vec<_> = analyzed.tokens().collect();
let mut matcher = matcher_builder.build(&analyzed[..], &old_string);

Value::String(matcher.format(true, true).to_string())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't quite like this format(true, true) thing as it is not clear what those too booleans mean, but we can open an issue and address this later.

Copy link
Member Author

@ManyTheFish ManyTheFish Apr 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can move them into the builder with a specific method for each 🤔

Copy link
Member Author

@ManyTheFish ManyTheFish Apr 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I chose to use FormatOption struct coming from Meilisearch which will ease the future integration.

}
Value::Array(values) => Value::Array(
values.into_iter().map(|v| self.highlight_value(v, matching_words)).collect(),
values.into_iter().map(|v| self.highlight_value(v, matcher_builder)).collect(),
),
Value::Object(object) => Value::Object(
object
.into_iter()
.map(|(k, v)| (k, self.highlight_value(v, matching_words)))
.map(|(k, v)| (k, self.highlight_value(v, matcher_builder)))
.collect(),
),
}
Expand All @@ -197,14 +179,14 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
fn highlight_record(
&self,
object: &mut Map<String, Value>,
matching_words: &MatchingWords,
matcher_builder: &MatcherBuilder,
attributes_to_highlight: &HashSet<String>,
) {
// TODO do we need to create a string for element that are not and needs to be highlight?
for (key, value) in object.iter_mut() {
if attributes_to_highlight.contains(key) {
let old_value = mem::take(value);
*value = self.highlight_value(old_value, matching_words);
*value = self.highlight_value(old_value, matcher_builder);
}
}
}
Expand Down Expand Up @@ -819,12 +801,15 @@ async fn main() -> anyhow::Result<()> {
let stop_words = fst::Set::default();
let highlighter = Highlighter::new(&stop_words);

let mut matcher_builder = MatcherBuilder::from_matching_words(matching_words);
matcher_builder.highlight_prefix("<mark>".to_string());
matcher_builder.highlight_suffix("</mark>".to_string());
for (_id, obkv) in index.documents(&rtxn, documents_ids).unwrap() {
let mut object = obkv_to_json(&displayed_fields, &fields_ids_map, obkv).unwrap();
if !disable_highlighting {
highlighter.highlight_record(
&mut object,
&matching_words,
&matcher_builder,
&attributes_to_highlight,
);
}
Expand Down
4 changes: 3 additions & 1 deletion milli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ pub use self::heed_codec::{
RoaringBitmapLenCodec, StrBEU32Codec, StrStrU8Codec,
};
pub use self::index::Index;
pub use self::search::{FacetDistribution, Filter, MatchingWords, Search, SearchResult};
pub use self::search::{
FacetDistribution, Filter, MatchBounds, MatcherBuilder, MatchingWords, Search, SearchResult,
};

pub type Result<T> = std::result::Result<T, error::Error>;

Expand Down
Loading