Skip to content

Commit

Permalink
Add data.match_from_pairs fuzzy symbology scanner
Browse files Browse the repository at this point in the history
A helper for scanning a "pairs table" that most backends should expose
as part of their (internal) symbology set using `rapidfuzz` over
a `dict[str, Struct]` input table.

Also expose the `data.types.Struct` at the subpkg top level.
  • Loading branch information
goodboy committed Sep 22, 2023
1 parent a97a0ce commit 0ba75df
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 1 deletion.
3 changes: 3 additions & 0 deletions piker/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,10 @@
SymbologyCache,
open_symcache,
get_symcache,
match_from_pairs,
)
from ._sampling import open_sample_stream
from ..types import Struct


__all__: list[str] = [
Expand All @@ -62,6 +64,7 @@
'open_symcache',
'open_sample_stream',
'get_symcache',
'Struct',
'SymbologyCache',
'types',
]
40 changes: 39 additions & 1 deletion piker/data/_symcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def search(
matches in a `dict` including the `MktPair` values.
'''
matches = fuzzy.extractBests(
matches = fuzzy.extract(
pattern,
getattr(self, table),
score_cutoff=50,
Expand Down Expand Up @@ -466,3 +466,41 @@ async def sched_gen_symcache():
pdbp.xpm()

return symcache


def match_from_pairs(
pairs: dict[str, Struct],
query: str,
score_cutoff: int = 50,

) -> dict[str, Struct]:
'''
Fuzzy search over a "pairs table" maintained by most backends
as part of their symbology-info caching internals.
Scan the native symbol key set and return best ranked
matches back in a new `dict`.
'''

# TODO: somehow cache this list (per call) like we were in
# `open_symbol_search()`?
keys: list[str] = list(pairs)
matches: list[tuple[
Sequence[Hashable], # matching input key
Any, # scores
Any,
]] = fuzzy.extract(
# NOTE: most backends provide keys uppercased
query=query,
choices=keys,
score_cutoff=score_cutoff,
)

# pop and repack pairs in output dict
matched_pairs: dict[str, Pair] = {}
for item in matches:
pair_key: str = item[0]
matched_pairs[pair_key] = pairs[pair_key]

return matched_pairs

0 comments on commit 0ba75df

Please sign in to comment.