Skip to content

Commit

Permalink
ranked match: prefer real word boundaries to camelCase boundaries
Browse files Browse the repository at this point in the history
This enables the test case from
mawww#4653 (comment)
and a few others from that PR.
  • Loading branch information
krobelus committed Nov 16, 2023
1 parent eeba729 commit a0281d7
Showing 1 changed file with 24 additions and 13 deletions.
37 changes: 24 additions & 13 deletions src/ranked_match.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,23 @@ bool matches(UsedLetters query, UsedLetters letters)

using Utf8It = utf8::iterator<const char*>;

static int count_word_boundaries_match(StringView candidate, StringView query)
struct WordBoundaries {
int count;
int weighted_count;
};

static WordBoundaries count_word_boundaries_match(StringView candidate, StringView query)
{
int count = 0;
int weighted_count = 0;
Utf8It query_it{query.begin(), query};
Codepoint prev = 0;
for (Utf8It it{candidate.begin(), candidate}; it != candidate.end(); ++it)
{
const Codepoint c = *it;
const bool is_word_boundary = prev == 0 or
(!iswalnum((wchar_t)prev) and iswalnum((wchar_t)c)) or
(iswlower((wchar_t)prev) and iswupper((wchar_t)c));
const int is_word_boundary = 2 * (prev == 0 or
(!iswalnum((wchar_t)prev) and iswalnum((wchar_t)c))) +
1 * (iswlower((wchar_t)prev) and iswupper((wchar_t)c));
prev = c;

if (not is_word_boundary)
Expand All @@ -59,14 +65,15 @@ static int count_word_boundaries_match(StringView candidate, StringView query)
if (qc == (iswlower((wchar_t)qc) ? lc : c))
{
++count;
weighted_count += is_word_boundary;
query_it = qit+1;
break;
}
}
if (query_it == query.end())
break;
}
return count;
return WordBoundaries{count, weighted_count};
}

static bool smartcase_eq(Codepoint candidate, Codepoint query)
Expand Down Expand Up @@ -163,8 +170,9 @@ RankedMatch::RankedMatch(StringView candidate, StringView query, TestFunc func)
}
}

m_word_boundary_match_count = count_word_boundaries_match(candidate, query);
if (m_word_boundary_match_count == query.length())
auto word_boundaries = count_word_boundaries_match(candidate, query);
m_word_boundary_match_count = word_boundaries.weighted_count;
if (word_boundaries.count == query.length())
m_flags |= Flags::OnlyWordBoundary;
}

Expand Down Expand Up @@ -198,7 +206,7 @@ bool RankedMatch::operator<(const RankedMatch& other) const

// If we are SingleWord, we dont want to take word boundaries from other
// words into account.
if (not (m_flags & (Flags::Prefix | Flags::SingleWord)) and
if (not (m_flags & Flags::Prefix) and
m_word_boundary_match_count != other.m_word_boundary_match_count)
return m_word_boundary_match_count > other.m_word_boundary_match_count;

Expand Down Expand Up @@ -252,11 +260,11 @@ UnitTest test_ranked_match{[] {
return RankedMatch{better, query} < RankedMatch{worse, query};
};

kak_assert(count_word_boundaries_match("run_all_tests", "rat") == 3);
kak_assert(count_word_boundaries_match("run_all_tests", "at") == 2);
kak_assert(count_word_boundaries_match("countWordBoundariesMatch", "wm") == 2);
kak_assert(count_word_boundaries_match("countWordBoundariesMatch", "cobm") == 3);
kak_assert(count_word_boundaries_match("countWordBoundariesMatch", "cWBM") == 4);
kak_assert(count_word_boundaries_match("run_all_tests", "rat").count == 3);
kak_assert(count_word_boundaries_match("run_all_tests", "at").count == 2);
kak_assert(count_word_boundaries_match("countWordBoundariesMatch", "wm").count == 2);
kak_assert(count_word_boundaries_match("countWordBoundariesMatch", "cobm").count == 3);
kak_assert(count_word_boundaries_match("countWordBoundariesMatch", "cWBM").count == 4);
kak_assert(preferred("so", "source", "source_data"));
kak_assert(not preferred("so", "source_data", "source"));
kak_assert(not preferred("so", "source", "source"));
Expand Down Expand Up @@ -286,6 +294,9 @@ UnitTest test_ranked_match{[] {
kak_assert(preferred("codegen", "clang/test/CodeGen/asm.c", "clang/test/ASTMerge/codegen-body/test.c"));
kak_assert(preferred("cho", "tchou kanaky", "tachou kanay")); // Prefer the leftmost match.
kak_assert(preferred("clangd", "clang-tools-extra/clangd/README.md", "clang/docs/conf.py"));
kak_assert(preferred("luaremote", "src/script/LuaRemote.cpp", "tests/TestLuaRemote.cpp"));
kak_assert(preferred("rm.cc", "src/ranked_match.cc", "test/README.asciidoc"));
kak_assert(preferred("fooo", "foo.o", "fo.o.o"));
}};

UnitTest test_used_letters{[]()
Expand Down

0 comments on commit a0281d7

Please sign in to comment.