From cc5a02df7d00b5d752b814d34df0ea3dcbf6a4b3 Mon Sep 17 00:00:00 2001
From: Oliver Smith <oliver@kfs.org>
Date: Sun, 30 Nov 2014 13:44:19 -0800
Subject: [PATCH] System/Station disambiguation improvements

TradeDB.lookupPlace now supports disambiguation of system
and station names.

System or Station: aulin, asellusprim  or  beagle2
Explicit System: @asellus
Explicit Station: /beagle or @/beagle
System/Station: primus/beag
Overkill: "@asellus primus/beagle 2 landing"
---
 tradedb.py | 181 ++++++++++++++++++++++++++++++++---------------------
 1 file changed, 110 insertions(+), 71 deletions(-)

diff --git a/tradedb.py b/tradedb.py
index 7300bd9e..7ec8d06d 100644
--- a/tradedb.py
+++ b/tradedb.py
@@ -36,27 +36,27 @@ class AmbiguityError(TradeException):
         Attributes:
             lookupType - description of what was being queried,
             searchKey  - the key given to the search routine,
-            candidates - list of candidates
+            anyMatch - list of anyMatch
             key        - retrieve the display string for a candidate
     """
-    def __init__(self, lookupType, searchKey, candidates, key=lambda item:item):
+    def __init__(self, lookupType, searchKey, anyMatch, key=lambda item:item):
         self.lookupType = lookupType
         self.searchKey = searchKey
-        self.candidates = candidates
+        self.anyMatch = anyMatch
         self.key = key
 
 
     def __str__(self):
-        candidates, key = self.candidates, self.key
-        if len(candidates) > 10:
+        anyMatch, key = self.anyMatch, self.key
+        if len(anyMatch) > 10:
             opportunities = ", ".join([
-                        key(c) for c in candidates[:10]
+                        key(c) for c in anyMatch[:10]
                     ] + ["..."])
         else:
             opportunities = ", ".join([
-                        key(c) for c in candidates[0:-1]
+                        key(c) for c in anyMatch[0:-1]
                     ])
-            opportunities += " or " + key(candidates[-1])
+            opportunities += " or " + key(anyMatch[-1])
         return '{} lookup: "{}" could match {}'.format(
                         self.lookupType, str(self.searchKey),
                         opportunities
@@ -477,7 +477,7 @@ def lookupSystemRelaxed(self, key):
                 return place
         except AmbiguityError as e:
             # See if the ambiguity resolves down to a single system.
-            for candidate in e.candidates:
+            for candidate in e.anyMatch:
                 if isinstance(candidate, Station):
                     systems.add(candidate.system)
                 else:
@@ -559,10 +559,6 @@ def _loadStations(self):
         self.tdenv.DEBUG1("Loaded {:n} Stations", len(stationByID))
 
 
-    def lookupSystemAndStation(self, systemName, stationName):
-        raise Exception("Not implemented yet")
-
-
     def lookupPlace(self, name):
         """
             Lookup the station/system specified by 'name' which can be the
@@ -575,34 +571,51 @@ def lookupPlace(self, name):
             the massive namespace of Stars and Systems, we rank the
             matches so that exact matches win, and only inferior close
             matches are looked at if no exacts are found.
+
+            Legal annotations:
+                system
+                station
+                @system    [explicitly a system name]
+                /station   [explicitly a station name]
+                system/station
+                @system/station
         """
         if isinstance(name, System) or isinstance(name, Station):
             return name
 
         slashPos = name.find('/')
-        if slashPos > 0:
+        nameOff = 1 if name.startswith('@') else 0
+        if slashPos > nameOff:
             # Slash indicates it's, e.g., AULIN/ENTERPRISE
-            system, station = name[0:slashPos], name[slashPos+1:]
-            return lookupSystemAndStation(self, system, station)
-        
-        exactMatches = []
-        closeMatches = []
-        wordMatches = []
-        candidates = []
+            sysName, stnName = name[nameOff:slashPos], name[slashPos+1:]
+        elif slashPos == nameOff:
+            sysName, stnName = None, name[nameOff+1:]
+        elif nameOff:
+            # It's explicitly a station
+            sysName, stnName = name[nameOff:], None
+        else:
+            # It could be either, use the name for both.
+            sysName = stnName = name[nameOff:]
 
-        normTrans = TradeDB.normalizeTrans
-        trimTrans = str.maketrans('', '', ' \'')
+        exactMatch = []
+        closeMatch = []
+        wordMatch = []
+        anyMatch = []
+
+        def lookup(name, candidates):
+            """ Search candidates for the given name """
 
-        nameNorm = name.translate(normTrans)
-        nameTrimmed = nameNorm.translate(trimTrans)
+            normTrans = TradeDB.normalizeTrans
+            trimTrans = str.maketrans('', '', ' \'')
 
-        nameLen = len(name)
-        nameNormLen = len(nameNorm)
-        nameTrimmedLen = len(nameTrimmed)
+            nameNorm = name.translate(normTrans)
+            nameTrimmed = nameNorm.translate(trimTrans)
 
-        def consider(placeList):
-            """ Try the specified namespace """
-            for place in placeList:
+            nameLen = len(name)
+            nameNormLen = len(nameNorm)
+            nameTrimmedLen = len(nameTrimmed)
+
+            for place in candidates:
                 placeName = place.dbname
                 placeNameNorm = placeName.translate(normTrans)
                 placeNameNormLen = len(placeNameNorm)
@@ -614,21 +627,28 @@ def consider(placeList):
                 # If the lengths match, do a direct comparison.
                 if len(placeName) == nameLen:
                     if placeNameNorm == nameNorm:
-                        exactMatches.append(place)
+                        exactMatch.append(place)
                     continue
                 if placeNameNormLen == nameNormLen:
                     if placeNameNorm == nameNorm:
-                        closeMatches.append(place)
+                        closeMatch.append(place)
                     continue
 
                 if nameNormLen < placeNameNormLen:
-                    if placeNameNorm.startswith(nameNorm):
+                    subPos = placeNameNorm.find(nameNorm)
+                    if subPos == 0:
                         if placeNameNorm[nameNormLen] == ' ':
-                            # E.g. 'aulin' vs 'aulin enterprise'
-                            wordMatches.append(place)
+                            # first word
+                            wordMatch.append(place)
+                        else:
+                            anyMatch.append(place)
+                        continue
+                    elif subPos > 0:
+                        if placeNameNorm[subPos] == ' ' and \
+                                placeNameNorm[subPos + nameNormLen] == ' ':
+                            wordMatch.append(place)
                         else:
-                            # E.g. "Russ' vs 'Russo'
-                            candidates.append(place)
+                            anyMatch.append(place)
                         continue
 
                 if not placeNameNorm.startswith(nameNorm[0]):
@@ -646,34 +666,53 @@ def consider(placeList):
                 # A match here is not exact but still fairly interesting
                 if len(placeNameTrimmed) == nameTrimmedLen:
                     if placeNameTrimmed == nameTrimmed:
-                        closeMatches.append(place)
+                        closeMatch.append(place)
                     continue
-                if placeNameTrimmed.startswith(nameTrimmed):
-                    candidates.append(place)
-
-        consider(self.systemByID.values())
-        consider(self.stationByID.values())
-
-        if exactMatches:
-            if len(exactMatches) == 1:
-                return exactMatches[0]
-        elif closeMatches:
-            if len(closeMatches) == 1:
-                return closeMatches[0]
-        elif wordMatches:
-            if len(wordMatches) == 1:
-                return wordMatches[0]
-        elif candidates:
-            if len(candidates) == 1:
-                return candidates[0]
-        else:
-            # Nothing matched
+                if placeNameTrimmed.find(nameTrimmed) >= 0:
+                    anyMatch.append(place)
+
+        if sysName:
+            lookup(sysName, self.systemByID.values())
+        if stnName:
+            # Are we considering the name as a station?
+            # (we don't if they type, e,g '@aulin')
+            # compare against nameOff to allow '@/station'
+            if slashPos > nameOff + 1:
+                # "sys/station"; the user should have specified a system
+                # name and we should be able to narrow down which
+                # stations we compare against. Check first if there are
+                # any matches.
+                stationCandidates = []
+                for sys in itertools.chain(
+                        exactMatch, closeMatch, wordMatch, anyMatch
+                        ):
+                    stationCandidates += sys.stations
+                # Clear out the candidate lists
+                exactMatch = []
+                closeMatch = []
+                wordMatch = []
+                anyMatch = []
+            else:
+                # Consider against all station names
+                stationCandidates = self.stationByID.values()
+            lookup(stnName, stationCandidates)
+
+        # consult the match sets in ranking order for a single
+        # match, which denotes a win at that tier. For example,
+        # if there is one exact match, we don't care how many
+        # close matches there were.
+        for matchSet in exactMatch, closeMatch, wordMatch, anyMatch:
+            if len(matchSet) == 1:
+                return matchSet[0]
+
+        # Nothing matched
+        if not any([exactMatch, closeMatch, wordMatch, anyMatch]):
             raise TradeException("Unrecognized place: {}".format(name))
     
         # More than one match
         raise AmbiguityError(
                     'System/Station', name,
-                    exactMatches + closeMatches + wordMatches + candidates,
+                    exactMatch + closeMatch + wordMatch + anyMatch,
                     key=lambda place: place.name())
 
 
@@ -1122,7 +1161,7 @@ class ListSearchMatch(namedtuple('Match', [ 'key', 'value' ])):
 
         normTrans = TradeDB.normalizeTrans
         needle = lookup.translate(normTrans)
-        partialMatches, wordMatches = [], []
+        partialMatch, wordMatch = [], []
         # make a regex to match whole words
         wordRe = re.compile(r'\b{}\b'.format(lookup), re.IGNORECASE)
         # describe a match
@@ -1135,19 +1174,19 @@ class ListSearchMatch(namedtuple('Match', [ 'key', 'value' ])):
                     return val(entry)
                 match = ListSearchMatch(entryKey, val(entry))
                 if wordRe.match(entryKey):
-                    wordMatches.append(match)
+                    wordMatch.append(match)
                 else:
-                    partialMatches.append(match)
+                    partialMatch.append(match)
         # Whole word matches trump partial matches
-        if wordMatches:
-            if len(wordMatches) > 1:
-                raise AmbiguityError(listType, lookup, wordMatches, key=lambda item: item.key)
-            return wordMatches[0].value
+        if wordMatch:
+            if len(wordMatch) > 1:
+                raise AmbiguityError(listType, lookup, wordMatch, key=lambda item: item.key)
+            return wordMatch[0].value
         # Fuzzy matches
-        if partialMatches:
-            if len(partialMatches) > 1:
-                raise AmbiguityError(listType, lookup, partialMatches, key=lambda item: item.key)
-            return partialMatches[0].value
+        if partialMatch:
+            if len(partialMatch) > 1:
+                raise AmbiguityError(listType, lookup, partialMatch, key=lambda item: item.key)
+            return partialMatch[0].value
         # No matches
         raise LookupError("Error: '%s' doesn't match any %s" % (lookup, listType))