Skip to content

Commit

Permalink
strsim crate now covered by strsim feature.
Browse files Browse the repository at this point in the history
  • Loading branch information
leontoeides committed Nov 15, 2023
1 parent 0dade8c commit ac094fa
Show file tree
Hide file tree
Showing 41 changed files with 94 additions and 77 deletions.
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,10 @@
//!
//! // Demonstrating fuzzy matching:
//!
//! # #[cfg(feature = "strsim")]
//! let resulting_keys: Vec<&usize> = search_index.search("Harry");
//!
//! # #[cfg(feature = "strsim")]
//! assert_eq!(resulting_keys, vec![&0]);
//! ```
//!
Expand Down Expand Up @@ -268,9 +270,11 @@
//!
//! // Demonstrating fuzzy matching:
//!
//! # #[cfg(feature = "strsim")]
//! let autocomplete_options: Vec<String> =
//! search_index.autocomplete("a very big birf");
//!
//! # #[cfg(feature = "strsim")]
//! assert_eq!(
//! autocomplete_options,
//! vec!["a very big bird", "a very big birthday"]
Expand Down
25 changes: 14 additions & 11 deletions src/simple/builder.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::simple::{AutocompleteType, SearchIndex, SearchType, StrsimMetric};
use crate::simple::{AutocompleteType, EddieMetric, SearchIndex, SearchType, StrsimMetric};
use kstring::KString;
use std::collections::{BTreeMap, BTreeSet};
use std::{clone::Clone, cmp::Ord};
Expand All @@ -18,8 +18,9 @@ pub struct SearchIndexBuilder<K> {
search_type: SearchType,
autocomplete_type: AutocompleteType,
strsim_metric: Option<StrsimMetric>,
strsim_length: usize,
strsim_minimum_score: f64,
eddie_metric: Option<EddieMetric>,
fuzzy_length: usize,
fuzzy_minimum_score: f64,
split_pattern: Option<Vec<char>>,
case_sensitive: bool,
minimum_keyword_length: usize,
Expand All @@ -42,8 +43,9 @@ impl<K: Clone + Ord> From<SearchIndex<K>> for SearchIndexBuilder<K> {
search_type: search_index.search_type,
autocomplete_type: search_index.autocomplete_type,
strsim_metric: search_index.strsim_metric,
strsim_length: search_index.strsim_length,
strsim_minimum_score: search_index.strsim_minimum_score,
eddie_metric: search_index.eddie_metric,
fuzzy_length: search_index.fuzzy_length,
fuzzy_minimum_score: search_index.fuzzy_minimum_score,
split_pattern: search_index.split_pattern,
case_sensitive: search_index.case_sensitive,
minimum_keyword_length: search_index.minimum_keyword_length,
Expand All @@ -68,8 +70,9 @@ impl<K: Clone + Ord> From<SearchIndexBuilder<K>> for SearchIndex<K> {
search_type: search_index.search_type,
autocomplete_type: search_index.autocomplete_type,
strsim_metric: search_index.strsim_metric,
strsim_length: search_index.strsim_length,
strsim_minimum_score: search_index.strsim_minimum_score,
eddie_metric: search_index.eddie_metric,
fuzzy_length: search_index.fuzzy_length,
fuzzy_minimum_score: search_index.fuzzy_minimum_score,
split_pattern: search_index.split_pattern,
case_sensitive: search_index.case_sensitive,
minimum_keyword_length: search_index.minimum_keyword_length,
Expand Down Expand Up @@ -163,8 +166,8 @@ impl<K: Clone + Ord> SearchIndexBuilder<K> {
///
/// **Default:** `3` characters
#[cfg(feature = "strsim")]
pub fn strsim_length(mut self, strsim_length: usize) -> Self {
self.strsim_length = strsim_length;
pub fn fuzzy_length(mut self, fuzzy_length: usize) -> Self {
self.fuzzy_length = fuzzy_length;
self
} // fn

Expand All @@ -182,8 +185,8 @@ impl<K: Clone + Ord> SearchIndexBuilder<K> {
///
/// **Default:** `0.3`
#[cfg(feature = "strsim")]
pub fn strsim_minimum_score(mut self, strsim_minimum_score: f64) -> Self {
self.strsim_minimum_score = strsim_minimum_score;
pub fn fuzzy_minimum_score(mut self, fuzzy_minimum_score: f64) -> Self {
self.fuzzy_minimum_score = fuzzy_minimum_score;
self
} // fn

Expand Down
5 changes: 3 additions & 2 deletions src/simple/default.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::simple::{AutocompleteType, SearchIndex, SearchType, StrsimMetric};
use crate::simple::{AutocompleteType, EddieMetric, SearchIndex, SearchType, StrsimMetric};
use std::cmp::Ord;

// -----------------------------------------------------------------------------
Expand All @@ -11,7 +11,8 @@ impl<K: Ord> Default for SearchIndex<K> {
Self::new(
SearchType::Live, // Search type.
AutocompleteType::Context, // Autocompletion type.
Some(StrsimMetric::Levenshtein), // String similarity metric type.
Some(StrsimMetric::Levenshtein),// String similarity metric type.
Some(EddieMetric::Levenshtein), // String similarity metric type.
3, // String similarity match length.
0.3, // String similarity minimum score.
// Default split pattern:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = normalized_damerau_levenshtein(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
2 changes: 1 addition & 1 deletion src/simple/internal/eddie/autocomplete/context_jaro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = jaro(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = jaro_winkler(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = normalized_levenshtein(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = sorensen_dice(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = normalized_damerau_levenshtein(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
2 changes: 1 addition & 1 deletion src/simple/internal/eddie/autocomplete/global_jaro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = jaro(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = jaro_winkler(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = normalized_levenshtein(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = sorensen_dice(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
6 changes: 3 additions & 3 deletions src/simple/internal/eddie/eddie_context_autocomplete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ impl<K: Hash + Ord> SearchIndex<K> {
// will be fuzzy matched against every keyword in the index. This is OK
// (or even desirable) if the search index isn't large, however, this
// will be crippling slow on very large search indicies.
let index_range: &str = if self.strsim_length > 0 {
let index_range: &str = if self.fuzzy_length > 0 {
// The user keyword must be longer than the match length to be
// evaluated for fuzzy-matches:
if user_keyword.len() >= self.strsim_length {
if user_keyword.len() >= self.fuzzy_length {
// Use the first _n_ characters of the user's keyword to find
// search index keywords to compare against:
&user_keyword[0..self.strsim_length]
&user_keyword[0..self.fuzzy_length]
} else {
// The user's keyword is too short. Do not perform any fuzzy
// matching:
Expand Down
6 changes: 3 additions & 3 deletions src/simple/internal/eddie/eddie_global_autocomplete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ impl<K: Hash + Ord> SearchIndex<K> {
// will be fuzzy matched against every keyword in the index. This is OK
// (or even desirable) if the search index isn't large, however, this
// will be crippling slow on very large search indicies.
let index_range: &str = if self.strsim_length > 0 {
let index_range: &str = if self.fuzzy_length > 0 {
// The user keyword must be longer than the match length to be
// evaluated for fuzzy-matches:
if user_keyword.len() >= self.strsim_length {
if user_keyword.len() >= self.fuzzy_length {
// Use the first _n_ characters of the user's keyword to find
// search index keywords to compare against:
&user_keyword[0..self.strsim_length]
&user_keyword[0..self.fuzzy_length]
} else {
// The user's keyword is too short. Do not perform any fuzzy
// matching:
Expand Down
6 changes: 3 additions & 3 deletions src/simple/internal/eddie/eddie_global_keyword.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ impl<K: Hash + Ord> SearchIndex<K> {
// will be fuzzy matched against every keyword in the index. This is OK
// (or even desirable) if the search index isn't large, however, this
// will be crippling slow on very large search indicies.
let index_range: &str = if self.strsim_length > 0 {
let index_range: &str = if self.fuzzy_length > 0 {
// The user keyword must be longer than the match length to be
// evaluated for fuzzy-matches:
if user_keyword.len() >= self.strsim_length {
if user_keyword.len() >= self.fuzzy_length {
// Use the first _n_ characters of the user's keyword to find
// search index keywords to compare against:
&user_keyword[0..self.strsim_length]
&user_keyword[0..self.fuzzy_length]
} else {
// The user's keyword is too short. Do not perform any fuzzy
// matching:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ impl<K: Ord> SearchIndex<K> {
) // map
// Search index keyword must meet minimum score to be considered as
// a fuzzy match:
.filter(|(_keyword, score)| score >= &self.strsim_minimum_score)
.filter(|(_keyword, score)| score >= &self.fuzzy_minimum_score)
// Find the `(keyword, score)` tuple with the highest score:
.max_by(|(_a_keyword, a_score), (_b_keyword, b_score)|
a_score.partial_cmp(b_score).unwrap()
Expand Down
2 changes: 1 addition & 1 deletion src/simple/internal/eddie/keyword/global_jaro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ impl<K: Ord> SearchIndex<K> {
) // map
// Search index keyword must meet minimum score to be considered as
// a fuzzy match:
.filter(|(_keyword, score)| score >= &self.strsim_minimum_score)
.filter(|(_keyword, score)| score >= &self.fuzzy_minimum_score)
// Find the `(keyword, score)` tuple with the highest score:
.max_by(|(_a_keyword, a_score), (_b_keyword, b_score)|
a_score.partial_cmp(b_score).unwrap()
Expand Down
2 changes: 1 addition & 1 deletion src/simple/internal/eddie/keyword/global_jaro_winkler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ impl<K: Ord> SearchIndex<K> {
) // map
// Search index keyword must meet minimum score to be considered as
// a fuzzy match:
.filter(|(_keyword, score)| score >= &self.strsim_minimum_score)
.filter(|(_keyword, score)| score >= &self.fuzzy_minimum_score)
// Find the `(keyword, score)` tuple with the highest score:
.max_by(|(_a_keyword, a_score), (_b_keyword, b_score)|
a_score.partial_cmp(b_score).unwrap()
Expand Down
2 changes: 1 addition & 1 deletion src/simple/internal/eddie/keyword/global_levenshtein.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ impl<K: Ord> SearchIndex<K> {
) // map
// Search index keyword must meet minimum score to be considered as
// a fuzzy match:
.filter(|(_keyword, score)| score >= &self.strsim_minimum_score)
.filter(|(_keyword, score)| score >= &self.fuzzy_minimum_score)
// Find the `(keyword, score)` tuple with the highest score:
.max_by(|(_a_keyword, a_score), (_b_keyword, b_score)|
a_score.partial_cmp(b_score).unwrap()
Expand Down
2 changes: 1 addition & 1 deletion src/simple/internal/eddie/keyword/global_sorensen_dice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ impl<K: Ord> SearchIndex<K> {
) // map
// Search index keyword must meet minimum score to be considered as
// a fuzzy match:
.filter(|(_keyword, score)| score >= &self.strsim_minimum_score)
.filter(|(_keyword, score)| score >= &self.fuzzy_minimum_score)
// Find the `(keyword, score)` tuple with the highest score:
.max_by(|(_a_keyword, a_score), (_b_keyword, b_score)|
a_score.partial_cmp(b_score).unwrap()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = normalized_damerau_levenshtein(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
2 changes: 1 addition & 1 deletion src/simple/internal/strsim/autocomplete/context_jaro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = jaro(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = jaro_winkler(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = normalized_levenshtein(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = sorensen_dice(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = normalized_damerau_levenshtein(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
2 changes: 1 addition & 1 deletion src/simple/internal/strsim/autocomplete/global_jaro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = jaro(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ impl<K: Hash + Ord> SearchIndex<K> {
let score = jaro_winkler(index_keyword, user_keyword);
// Insert the score into the top scores (if it's normal and high
// enough):
if score.is_normal() && score >= self.strsim_minimum_score {
if score.is_normal() && score >= self.fuzzy_minimum_score {
top_scores.insert(index_keyword, index_keys, score)
} // if
}); // for_each
Expand Down
Loading

0 comments on commit ac094fa

Please sign in to comment.