From 0ba75df877e189dfcde4f33e14960f77ca075e52 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 22 Sep 2023 13:53:18 -0400 Subject: [PATCH] Add `data.match_from_pairs` fuzzy symbology scanner A helper for scanning a "pairs table" that most backends should expose as part of their (internal) symbology set using `rapidfuzz` over a `dict[str, Struct]` input table. Also expose the `data.types.Struct` at the subpkg top level. --- piker/data/__init__.py | 3 +++ piker/data/_symcache.py | 40 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/piker/data/__init__.py b/piker/data/__init__.py index 9b12697e9..25c2912a5 100644 --- a/piker/data/__init__.py +++ b/piker/data/__init__.py @@ -43,8 +43,10 @@ SymbologyCache, open_symcache, get_symcache, + match_from_pairs, ) from ._sampling import open_sample_stream +from ..types import Struct __all__: list[str] = [ @@ -62,6 +64,7 @@ 'open_symcache', 'open_sample_stream', 'get_symcache', + 'Struct', 'SymbologyCache', 'types', ] diff --git a/piker/data/_symcache.py b/piker/data/_symcache.py index 1ba724a2f..44057e89d 100644 --- a/piker/data/_symcache.py +++ b/piker/data/_symcache.py @@ -308,7 +308,7 @@ def search( matches in a `dict` including the `MktPair` values. ''' - matches = fuzzy.extractBests( + matches = fuzzy.extract( pattern, getattr(self, table), score_cutoff=50, @@ -466,3 +466,41 @@ async def sched_gen_symcache(): pdbp.xpm() return symcache + + +def match_from_pairs( + pairs: dict[str, Struct], + query: str, + score_cutoff: int = 50, + +) -> dict[str, Struct]: + ''' + Fuzzy search over a "pairs table" maintained by most backends + as part of their symbology-info caching internals. + + Scan the native symbol key set and return best ranked + matches back in a new `dict`. + + ''' + + # TODO: somehow cache this list (per call) like we were in + # `open_symbol_search()`? + keys: list[str] = list(pairs) + matches: list[tuple[ + Sequence[Hashable], # matching input key + Any, # scores + Any, + ]] = fuzzy.extract( + # NOTE: most backends provide keys uppercased + query=query, + choices=keys, + score_cutoff=score_cutoff, + ) + + # pop and repack pairs in output dict + matched_pairs: dict[str, Pair] = {} + for item in matches: + pair_key: str = item[0] + matched_pairs[pair_key] = pairs[pair_key] + + return matched_pairs