Skip to content

Commit

Permalink
Don't allocate full fuzzy scoring matrix when only ranking matches
Browse files Browse the repository at this point in the history
We can compute the fuzzy matching score without keeping in memory the
scores of all prefixes.
Currently we only use the full score matrix for debugging.  In future
I want to use it to get the positions of matched characters (which
could be underlined in the UI).
  • Loading branch information
krobelus committed Nov 11, 2023
1 parent c452313 commit 8cd981b
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 13 deletions.
2 changes: 1 addition & 1 deletion gdb/kakoune.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,5 +235,5 @@ def build_pretty_printer():
pp.add_printer('ByteCount', '^Kakoune::ByteCount$', ByteCount)
pp.add_printer('Color', '^Kakoune::Color$', Color)
pp.add_printer('Regex', '^Kakoune::Regex$', Regex)
pp.add_printer('SubsequenceDistance', '^Kakoune::SubsequenceDistance$', SubsequenceDistance)
pp.add_printer('SubsequenceDistance', '^Kakoune::SubsequenceDistance<true>$', SubsequenceDistance)
return pp
37 changes: 25 additions & 12 deletions src/ranked_match.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,16 @@ struct Distance
int distance_ending_in_gap = 0;
};

template<bool full_matrix>
class SubsequenceDistance
{
public:
SubsequenceDistance(const RankedMatchQuery& query, StringView candidate)
: query{query}, candidate{candidate},
stride{candidate.char_length() + 1},
m_matrix{(size_t)((query_length(query) + 1) * stride)} {}
m_matrix{(size_t)(
(full_matrix ? (query_length(query) + 1) : 2)
* stride)} {}

ArrayView<Distance, CharCount> operator[](CharCount query_i)
{
Expand Down Expand Up @@ -132,7 +135,8 @@ class SubsequenceDistance
static constexpr int infinity = std::numeric_limits<int>::max();
constexpr int max_index_weight = 1;

static SubsequenceDistance subsequence_distance(const RankedMatchQuery& query, StringView candidate)
template<bool full_matrix>
static SubsequenceDistance<full_matrix> subsequence_distance(const RankedMatchQuery& query, StringView candidate)
{
auto match_bonus = [](bool starts_word, bool is_same_case) -> int {
return -150 * starts_word
Expand All @@ -142,7 +146,7 @@ static SubsequenceDistance subsequence_distance(const RankedMatchQuery& query, S
constexpr int gap_weight = 200;
constexpr int gap_extend_weight = 1;

SubsequenceDistance distance{query, candidate};
SubsequenceDistance<full_matrix> distance{query, candidate};

CharCount candidate_length = candidate.char_length();

Expand All @@ -156,8 +160,17 @@ static SubsequenceDistance subsequence_distance(const RankedMatchQuery& query, S
query_i <= query_length(query);
query_i++)
{
auto row = distance[query_i];
auto prev_row = distance[query_i-1];
CharCount query_virtual_i = query_i;
CharCount prev_query_virtual_i = query_i - 1;
// Only keep the last two rows in memory, swapping them in each iteration.
if constexpr (not full_matrix)
{
query_virtual_i %= 2;
prev_query_virtual_i %= 2;
}

auto row = distance[query_virtual_i];
auto prev_row = distance[prev_query_virtual_i];

// Since we only allow subsequence matches, we don't need deletions.
// This rules out prefix-matches where the query is longer than the
Expand Down Expand Up @@ -248,9 +261,9 @@ RankedMatch::RankedMatch(StringView candidate, const RankedMatchQuery& query, Te
m_candidate = candidate;
m_matches = true;

auto distance = subsequence_distance(query, bounded_candidate);
auto distance = subsequence_distance<false>(query, bounded_candidate);

m_distance = distance[query_length(query)][bounded_candidate.char_length()].distance
m_distance = distance[query_length(query) % 2][bounded_candidate.char_length()].distance
+ (int)distance.max_index * max_index_weight;
}

Expand Down Expand Up @@ -328,7 +341,7 @@ static_assert(log2(3) == 1);
static_assert(log2(4) == 2);

// returns a string representation of the distance matrix, for debugging only
[[maybe_unused]] static String to_string(const SubsequenceDistance& distance)
[[maybe_unused]] static String to_string(const SubsequenceDistance<true>& distance)
{
const RankedMatchQuery& query = distance.query;
StringView candidate = distance.candidate;
Expand Down Expand Up @@ -378,13 +391,13 @@ static_assert(log2(4) == 2);
UnitTest test_ranked_match{[] {
// Convenience variables, for debugging only.
Optional<RankedMatchQuery> q;
Optional<SubsequenceDistance> distance_better;
Optional<SubsequenceDistance> distance_worse;
Optional<SubsequenceDistance<true>> distance_better;
Optional<SubsequenceDistance<true>> distance_worse;

auto preferred = [&](StringView query, StringView better, StringView worse) -> bool {
q = RankedMatchQuery{query};
distance_better = subsequence_distance(*q, better);
distance_worse = subsequence_distance(*q, worse);
distance_better = subsequence_distance<true>(*q, better);
distance_worse = subsequence_distance<true>(*q, worse);
return RankedMatch{better, *q} < RankedMatch{worse, *q};
};

Expand Down

0 comments on commit 8cd981b

Please sign in to comment.