From cc5a02df7d00b5d752b814d34df0ea3dcbf6a4b3 Mon Sep 17 00:00:00 2001 From: Oliver Smith Date: Sun, 30 Nov 2014 13:44:19 -0800 Subject: [PATCH] System/Station disambiguation improvements TradeDB.lookupPlace now supports disambiguation of system and station names. System or Station: aulin, asellusprim or beagle2 Explicit System: @asellus Explicit Station: /beagle or @/beagle System/Station: primus/beag Overkill: "@asellus primus/beagle 2 landing" --- tradedb.py | 181 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 110 insertions(+), 71 deletions(-) diff --git a/tradedb.py b/tradedb.py index 7300bd9e..7ec8d06d 100644 --- a/tradedb.py +++ b/tradedb.py @@ -36,27 +36,27 @@ class AmbiguityError(TradeException): Attributes: lookupType - description of what was being queried, searchKey - the key given to the search routine, - candidates - list of candidates + anyMatch - list of anyMatch key - retrieve the display string for a candidate """ - def __init__(self, lookupType, searchKey, candidates, key=lambda item:item): + def __init__(self, lookupType, searchKey, anyMatch, key=lambda item:item): self.lookupType = lookupType self.searchKey = searchKey - self.candidates = candidates + self.anyMatch = anyMatch self.key = key def __str__(self): - candidates, key = self.candidates, self.key - if len(candidates) > 10: + anyMatch, key = self.anyMatch, self.key + if len(anyMatch) > 10: opportunities = ", ".join([ - key(c) for c in candidates[:10] + key(c) for c in anyMatch[:10] ] + ["..."]) else: opportunities = ", ".join([ - key(c) for c in candidates[0:-1] + key(c) for c in anyMatch[0:-1] ]) - opportunities += " or " + key(candidates[-1]) + opportunities += " or " + key(anyMatch[-1]) return '{} lookup: "{}" could match {}'.format( self.lookupType, str(self.searchKey), opportunities @@ -477,7 +477,7 @@ def lookupSystemRelaxed(self, key): return place except AmbiguityError as e: # See if the ambiguity resolves down to a single system. - for candidate in e.candidates: + for candidate in e.anyMatch: if isinstance(candidate, Station): systems.add(candidate.system) else: @@ -559,10 +559,6 @@ def _loadStations(self): self.tdenv.DEBUG1("Loaded {:n} Stations", len(stationByID)) - def lookupSystemAndStation(self, systemName, stationName): - raise Exception("Not implemented yet") - - def lookupPlace(self, name): """ Lookup the station/system specified by 'name' which can be the @@ -575,34 +571,51 @@ def lookupPlace(self, name): the massive namespace of Stars and Systems, we rank the matches so that exact matches win, and only inferior close matches are looked at if no exacts are found. + + Legal annotations: + system + station + @system [explicitly a system name] + /station [explicitly a station name] + system/station + @system/station """ if isinstance(name, System) or isinstance(name, Station): return name slashPos = name.find('/') - if slashPos > 0: + nameOff = 1 if name.startswith('@') else 0 + if slashPos > nameOff: # Slash indicates it's, e.g., AULIN/ENTERPRISE - system, station = name[0:slashPos], name[slashPos+1:] - return lookupSystemAndStation(self, system, station) - - exactMatches = [] - closeMatches = [] - wordMatches = [] - candidates = [] + sysName, stnName = name[nameOff:slashPos], name[slashPos+1:] + elif slashPos == nameOff: + sysName, stnName = None, name[nameOff+1:] + elif nameOff: + # It's explicitly a station + sysName, stnName = name[nameOff:], None + else: + # It could be either, use the name for both. + sysName = stnName = name[nameOff:] - normTrans = TradeDB.normalizeTrans - trimTrans = str.maketrans('', '', ' \'') + exactMatch = [] + closeMatch = [] + wordMatch = [] + anyMatch = [] + + def lookup(name, candidates): + """ Search candidates for the given name """ - nameNorm = name.translate(normTrans) - nameTrimmed = nameNorm.translate(trimTrans) + normTrans = TradeDB.normalizeTrans + trimTrans = str.maketrans('', '', ' \'') - nameLen = len(name) - nameNormLen = len(nameNorm) - nameTrimmedLen = len(nameTrimmed) + nameNorm = name.translate(normTrans) + nameTrimmed = nameNorm.translate(trimTrans) - def consider(placeList): - """ Try the specified namespace """ - for place in placeList: + nameLen = len(name) + nameNormLen = len(nameNorm) + nameTrimmedLen = len(nameTrimmed) + + for place in candidates: placeName = place.dbname placeNameNorm = placeName.translate(normTrans) placeNameNormLen = len(placeNameNorm) @@ -614,21 +627,28 @@ def consider(placeList): # If the lengths match, do a direct comparison. if len(placeName) == nameLen: if placeNameNorm == nameNorm: - exactMatches.append(place) + exactMatch.append(place) continue if placeNameNormLen == nameNormLen: if placeNameNorm == nameNorm: - closeMatches.append(place) + closeMatch.append(place) continue if nameNormLen < placeNameNormLen: - if placeNameNorm.startswith(nameNorm): + subPos = placeNameNorm.find(nameNorm) + if subPos == 0: if placeNameNorm[nameNormLen] == ' ': - # E.g. 'aulin' vs 'aulin enterprise' - wordMatches.append(place) + # first word + wordMatch.append(place) + else: + anyMatch.append(place) + continue + elif subPos > 0: + if placeNameNorm[subPos] == ' ' and \ + placeNameNorm[subPos + nameNormLen] == ' ': + wordMatch.append(place) else: - # E.g. "Russ' vs 'Russo' - candidates.append(place) + anyMatch.append(place) continue if not placeNameNorm.startswith(nameNorm[0]): @@ -646,34 +666,53 @@ def consider(placeList): # A match here is not exact but still fairly interesting if len(placeNameTrimmed) == nameTrimmedLen: if placeNameTrimmed == nameTrimmed: - closeMatches.append(place) + closeMatch.append(place) continue - if placeNameTrimmed.startswith(nameTrimmed): - candidates.append(place) - - consider(self.systemByID.values()) - consider(self.stationByID.values()) - - if exactMatches: - if len(exactMatches) == 1: - return exactMatches[0] - elif closeMatches: - if len(closeMatches) == 1: - return closeMatches[0] - elif wordMatches: - if len(wordMatches) == 1: - return wordMatches[0] - elif candidates: - if len(candidates) == 1: - return candidates[0] - else: - # Nothing matched + if placeNameTrimmed.find(nameTrimmed) >= 0: + anyMatch.append(place) + + if sysName: + lookup(sysName, self.systemByID.values()) + if stnName: + # Are we considering the name as a station? + # (we don't if they type, e,g '@aulin') + # compare against nameOff to allow '@/station' + if slashPos > nameOff + 1: + # "sys/station"; the user should have specified a system + # name and we should be able to narrow down which + # stations we compare against. Check first if there are + # any matches. + stationCandidates = [] + for sys in itertools.chain( + exactMatch, closeMatch, wordMatch, anyMatch + ): + stationCandidates += sys.stations + # Clear out the candidate lists + exactMatch = [] + closeMatch = [] + wordMatch = [] + anyMatch = [] + else: + # Consider against all station names + stationCandidates = self.stationByID.values() + lookup(stnName, stationCandidates) + + # consult the match sets in ranking order for a single + # match, which denotes a win at that tier. For example, + # if there is one exact match, we don't care how many + # close matches there were. + for matchSet in exactMatch, closeMatch, wordMatch, anyMatch: + if len(matchSet) == 1: + return matchSet[0] + + # Nothing matched + if not any([exactMatch, closeMatch, wordMatch, anyMatch]): raise TradeException("Unrecognized place: {}".format(name)) # More than one match raise AmbiguityError( 'System/Station', name, - exactMatches + closeMatches + wordMatches + candidates, + exactMatch + closeMatch + wordMatch + anyMatch, key=lambda place: place.name()) @@ -1122,7 +1161,7 @@ class ListSearchMatch(namedtuple('Match', [ 'key', 'value' ])): normTrans = TradeDB.normalizeTrans needle = lookup.translate(normTrans) - partialMatches, wordMatches = [], [] + partialMatch, wordMatch = [], [] # make a regex to match whole words wordRe = re.compile(r'\b{}\b'.format(lookup), re.IGNORECASE) # describe a match @@ -1135,19 +1174,19 @@ class ListSearchMatch(namedtuple('Match', [ 'key', 'value' ])): return val(entry) match = ListSearchMatch(entryKey, val(entry)) if wordRe.match(entryKey): - wordMatches.append(match) + wordMatch.append(match) else: - partialMatches.append(match) + partialMatch.append(match) # Whole word matches trump partial matches - if wordMatches: - if len(wordMatches) > 1: - raise AmbiguityError(listType, lookup, wordMatches, key=lambda item: item.key) - return wordMatches[0].value + if wordMatch: + if len(wordMatch) > 1: + raise AmbiguityError(listType, lookup, wordMatch, key=lambda item: item.key) + return wordMatch[0].value # Fuzzy matches - if partialMatches: - if len(partialMatches) > 1: - raise AmbiguityError(listType, lookup, partialMatches, key=lambda item: item.key) - return partialMatches[0].value + if partialMatch: + if len(partialMatch) > 1: + raise AmbiguityError(listType, lookup, partialMatch, key=lambda item: item.key) + return partialMatch[0].value # No matches raise LookupError("Error: '%s' doesn't match any %s" % (lookup, listType))