Skip to content

Commit

Permalink
normalizedStr adjustments
Browse files Browse the repository at this point in the history
In order to increase the much larger namespace, we're going to require users to sometimes be more specific in naming things. To support this, I've made the normalizer a little less aggressive. This should be followed by some changes which make the lookup system a little more aggressive and also explain in more detail what the ambiguities that arise are.
  • Loading branch information
Oliver Smith committed Nov 28, 2014
1 parent e031c8d commit 6bb2241
Showing 1 changed file with 15 additions and 8 deletions.
23 changes: 15 additions & 8 deletions tradedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,12 @@ class TradeDB(object):
normalizedStr - Normalizes a search index string.
"""

normalizeRe = re.compile(r'[ \t\'\"\.\-_]')
# Translation map for normalizing strings
normalizeTrans = str.maketrans(
'abcdefghijklmnopqrstuvwxyz',
'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
'[]()*+-.,{}:'
)
# The DB cache
defaultDB = './data/TradeDangerous.db'
# File containing SQL to build the DB cache from
Expand Down Expand Up @@ -1024,17 +1029,18 @@ def listSearch(listType, lookup, values, key=lambda item: item, val=lambda item:
class ListSearchMatch(namedtuple('Match', [ 'key', 'value' ])):
pass

needle = TradeDB.normalizedStr(lookup)
normTrans = TradeDB.normalizeTrans
needle = lookup.translate(normTrans)
partialMatches, wordMatches = [], []
# make a regex to match whole words
wordRe = re.compile(r'\b{}\b'.format(lookup), re.IGNORECASE)
# describe a match
for entry in values:
entryKey = key(entry)
normVal = TradeDB.normalizedStr(entryKey)
normVal = entryKey.translate(normTrans)
if normVal.find(needle) > -1:
# If this is an exact match, ignore ambiguities.
if normVal == needle:
if len(normVal) == len(needle):
return val(entry)
match = ListSearchMatch(entryKey, val(entry))
if wordRe.match(entryKey):
Expand All @@ -1056,11 +1062,12 @@ class ListSearchMatch(namedtuple('Match', [ 'key', 'value' ])):


@staticmethod
def normalizedStr(str):
def normalizedStr(text):
"""
Returns a case folded, sanitized version of 'str' suitable for
performing simple and partial matches against. Removes whitespace,
hyphens, underscores, periods and apostrophes.
performing simple and partial matches against. Removes various
punctuation characters that don't contribute to name uniqueness.
NOTE: No-longer removes whitespaces or apostrophes.
"""
return TradeDB.normalizeRe.sub('', str).casefold()
return text.translate(TradeDB.normalizeTrans)

0 comments on commit 6bb2241

Please sign in to comment.