Skip to content

Commit

Permalink
System/Station disambiguation improvements
Browse files Browse the repository at this point in the history
TradeDB.lookupPlace now supports disambiguation of system
and station names.

System or Station: aulin, asellusprim  or  beagle2
Explicit System: @Asellus
Explicit Station: /beagle or @/beagle
System/Station: primus/beag
Overkill: "@Asellus primus/beagle 2 landing"
  • Loading branch information
kfsone committed Nov 30, 2014
1 parent 3d829ac commit cc5a02d
Showing 1 changed file with 110 additions and 71 deletions.
181 changes: 110 additions & 71 deletions tradedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,27 +36,27 @@ class AmbiguityError(TradeException):
Attributes:
lookupType - description of what was being queried,
searchKey - the key given to the search routine,
candidates - list of candidates
anyMatch - list of anyMatch
key - retrieve the display string for a candidate
"""
def __init__(self, lookupType, searchKey, candidates, key=lambda item:item):
def __init__(self, lookupType, searchKey, anyMatch, key=lambda item:item):
self.lookupType = lookupType
self.searchKey = searchKey
self.candidates = candidates
self.anyMatch = anyMatch
self.key = key


def __str__(self):
candidates, key = self.candidates, self.key
if len(candidates) > 10:
anyMatch, key = self.anyMatch, self.key
if len(anyMatch) > 10:
opportunities = ", ".join([
key(c) for c in candidates[:10]
key(c) for c in anyMatch[:10]
] + ["..."])
else:
opportunities = ", ".join([
key(c) for c in candidates[0:-1]
key(c) for c in anyMatch[0:-1]
])
opportunities += " or " + key(candidates[-1])
opportunities += " or " + key(anyMatch[-1])
return '{} lookup: "{}" could match {}'.format(
self.lookupType, str(self.searchKey),
opportunities
Expand Down Expand Up @@ -477,7 +477,7 @@ def lookupSystemRelaxed(self, key):
return place
except AmbiguityError as e:
# See if the ambiguity resolves down to a single system.
for candidate in e.candidates:
for candidate in e.anyMatch:
if isinstance(candidate, Station):
systems.add(candidate.system)
else:
Expand Down Expand Up @@ -559,10 +559,6 @@ def _loadStations(self):
self.tdenv.DEBUG1("Loaded {:n} Stations", len(stationByID))


def lookupSystemAndStation(self, systemName, stationName):
raise Exception("Not implemented yet")


def lookupPlace(self, name):
"""
Lookup the station/system specified by 'name' which can be the
Expand All @@ -575,34 +571,51 @@ def lookupPlace(self, name):
the massive namespace of Stars and Systems, we rank the
matches so that exact matches win, and only inferior close
matches are looked at if no exacts are found.
Legal annotations:
system
station
@system [explicitly a system name]
/station [explicitly a station name]
system/station
@system/station
"""
if isinstance(name, System) or isinstance(name, Station):
return name

slashPos = name.find('/')
if slashPos > 0:
nameOff = 1 if name.startswith('@') else 0
if slashPos > nameOff:
# Slash indicates it's, e.g., AULIN/ENTERPRISE
system, station = name[0:slashPos], name[slashPos+1:]
return lookupSystemAndStation(self, system, station)

exactMatches = []
closeMatches = []
wordMatches = []
candidates = []
sysName, stnName = name[nameOff:slashPos], name[slashPos+1:]
elif slashPos == nameOff:
sysName, stnName = None, name[nameOff+1:]
elif nameOff:
# It's explicitly a station
sysName, stnName = name[nameOff:], None
else:
# It could be either, use the name for both.
sysName = stnName = name[nameOff:]

normTrans = TradeDB.normalizeTrans
trimTrans = str.maketrans('', '', ' \'')
exactMatch = []
closeMatch = []
wordMatch = []
anyMatch = []

def lookup(name, candidates):
""" Search candidates for the given name """

nameNorm = name.translate(normTrans)
nameTrimmed = nameNorm.translate(trimTrans)
normTrans = TradeDB.normalizeTrans
trimTrans = str.maketrans('', '', ' \'')

nameLen = len(name)
nameNormLen = len(nameNorm)
nameTrimmedLen = len(nameTrimmed)
nameNorm = name.translate(normTrans)
nameTrimmed = nameNorm.translate(trimTrans)

def consider(placeList):
""" Try the specified namespace """
for place in placeList:
nameLen = len(name)
nameNormLen = len(nameNorm)
nameTrimmedLen = len(nameTrimmed)

for place in candidates:
placeName = place.dbname
placeNameNorm = placeName.translate(normTrans)
placeNameNormLen = len(placeNameNorm)
Expand All @@ -614,21 +627,28 @@ def consider(placeList):
# If the lengths match, do a direct comparison.
if len(placeName) == nameLen:
if placeNameNorm == nameNorm:
exactMatches.append(place)
exactMatch.append(place)
continue
if placeNameNormLen == nameNormLen:
if placeNameNorm == nameNorm:
closeMatches.append(place)
closeMatch.append(place)
continue

if nameNormLen < placeNameNormLen:
if placeNameNorm.startswith(nameNorm):
subPos = placeNameNorm.find(nameNorm)
if subPos == 0:
if placeNameNorm[nameNormLen] == ' ':
# E.g. 'aulin' vs 'aulin enterprise'
wordMatches.append(place)
# first word
wordMatch.append(place)
else:
anyMatch.append(place)
continue
elif subPos > 0:
if placeNameNorm[subPos] == ' ' and \
placeNameNorm[subPos + nameNormLen] == ' ':
wordMatch.append(place)
else:
# E.g. "Russ' vs 'Russo'
candidates.append(place)
anyMatch.append(place)
continue

if not placeNameNorm.startswith(nameNorm[0]):
Expand All @@ -646,34 +666,53 @@ def consider(placeList):
# A match here is not exact but still fairly interesting
if len(placeNameTrimmed) == nameTrimmedLen:
if placeNameTrimmed == nameTrimmed:
closeMatches.append(place)
closeMatch.append(place)
continue
if placeNameTrimmed.startswith(nameTrimmed):
candidates.append(place)

consider(self.systemByID.values())
consider(self.stationByID.values())

if exactMatches:
if len(exactMatches) == 1:
return exactMatches[0]
elif closeMatches:
if len(closeMatches) == 1:
return closeMatches[0]
elif wordMatches:
if len(wordMatches) == 1:
return wordMatches[0]
elif candidates:
if len(candidates) == 1:
return candidates[0]
else:
# Nothing matched
if placeNameTrimmed.find(nameTrimmed) >= 0:
anyMatch.append(place)

if sysName:
lookup(sysName, self.systemByID.values())
if stnName:
# Are we considering the name as a station?
# (we don't if they type, e,g '@aulin')
# compare against nameOff to allow '@/station'
if slashPos > nameOff + 1:
# "sys/station"; the user should have specified a system
# name and we should be able to narrow down which
# stations we compare against. Check first if there are
# any matches.
stationCandidates = []
for sys in itertools.chain(
exactMatch, closeMatch, wordMatch, anyMatch
):
stationCandidates += sys.stations
# Clear out the candidate lists
exactMatch = []
closeMatch = []
wordMatch = []
anyMatch = []
else:
# Consider against all station names
stationCandidates = self.stationByID.values()
lookup(stnName, stationCandidates)

# consult the match sets in ranking order for a single
# match, which denotes a win at that tier. For example,
# if there is one exact match, we don't care how many
# close matches there were.
for matchSet in exactMatch, closeMatch, wordMatch, anyMatch:
if len(matchSet) == 1:
return matchSet[0]

# Nothing matched
if not any([exactMatch, closeMatch, wordMatch, anyMatch]):
raise TradeException("Unrecognized place: {}".format(name))

# More than one match
raise AmbiguityError(
'System/Station', name,
exactMatches + closeMatches + wordMatches + candidates,
exactMatch + closeMatch + wordMatch + anyMatch,
key=lambda place: place.name())


Expand Down Expand Up @@ -1122,7 +1161,7 @@ class ListSearchMatch(namedtuple('Match', [ 'key', 'value' ])):

normTrans = TradeDB.normalizeTrans
needle = lookup.translate(normTrans)
partialMatches, wordMatches = [], []
partialMatch, wordMatch = [], []
# make a regex to match whole words
wordRe = re.compile(r'\b{}\b'.format(lookup), re.IGNORECASE)
# describe a match
Expand All @@ -1135,19 +1174,19 @@ class ListSearchMatch(namedtuple('Match', [ 'key', 'value' ])):
return val(entry)
match = ListSearchMatch(entryKey, val(entry))
if wordRe.match(entryKey):
wordMatches.append(match)
wordMatch.append(match)
else:
partialMatches.append(match)
partialMatch.append(match)
# Whole word matches trump partial matches
if wordMatches:
if len(wordMatches) > 1:
raise AmbiguityError(listType, lookup, wordMatches, key=lambda item: item.key)
return wordMatches[0].value
if wordMatch:
if len(wordMatch) > 1:
raise AmbiguityError(listType, lookup, wordMatch, key=lambda item: item.key)
return wordMatch[0].value
# Fuzzy matches
if partialMatches:
if len(partialMatches) > 1:
raise AmbiguityError(listType, lookup, partialMatches, key=lambda item: item.key)
return partialMatches[0].value
if partialMatch:
if len(partialMatch) > 1:
raise AmbiguityError(listType, lookup, partialMatch, key=lambda item: item.key)
return partialMatch[0].value
# No matches
raise LookupError("Error: '%s' doesn't match any %s" % (lookup, listType))

Expand Down

0 comments on commit cc5a02d

Please sign in to comment.