Merged in bgol/tradedangerous/devel (pull request #27)

CSV import/export update
eyeonus · Nov 29, 2014 · 24deffc · 24deffc
2 parents 6e0ae9e + e9a5e05
commit 24deffc
Show file tree

Hide file tree

Showing 11 changed files with 2,143 additions and 2,048 deletions.
diff --git a/README.txt b/README.txt
@@ -577,6 +577,9 @@ EXPORT sub-command:
     -T TABLE[,TABLE,...]
       Specify a comma separated list of tablenames to export.
 
+    --delete-empty
+      Delete CSV files without content.
+
   Examples:
     > trade.py export --path misc
     Using database './data/TradeDangerous.db'

diff --git a/cache.py b/cache.py
@@ -626,10 +626,11 @@ def processImportFile(tdenv, db, importPath, tableName):
     fkeySelectStr = ("("
             "SELECT {newValue}"
             " FROM {table}"
-            " WHERE {table}.{column} = ?"
+            " WHERE {stmt}"
             ")"
     )
     uniquePfx = "unq:"
+    ignorePfx = "!"
 
     with importPath.open(encoding='utf-8') as importFile:
         csvin = csv.reader(importFile, delimiter=',', quotechar="'", doublequote=True)
@@ -639,9 +640,9 @@ def processImportFile(tdenv, db, importPath, tableName):
 
         # split up columns and values
         # this is necessqary because the insert might use a foreign key
-        columnNames = []
         bindColumns = []
         bindValues  = []
+        joinHelper  = []
         uniqueIndexes = []
         for (cIndex, cName) in enumerate(columnDefs):
             splitNames = cName.split('@')
@@ -650,23 +651,34 @@ def processImportFile(tdenv, db, importPath, tableName):
             if colName.startswith(uniquePfx):
                 uniqueIndexes += [ cIndex ]
                 colName = colName[len(uniquePfx):]
-            columnNames.append(colName)
+            if colName.startswith(ignorePfx):
+                # this column is only used to resolve an FK
+                colName = colName[len(ignorePfx):]
+                joinHelper.append( "{}@{}".format(colName, splitNames[1]) )
+                continue
 
             if len(splitNames) == 1:
                 # no foreign key, straight insert
                 bindColumns.append(colName)
                 bindValues.append('?')
             else:
                 # foreign key, we need to make a select
-                splitJoin    = splitNames[1].split('.')
-                joinTable    = splitJoin[0]
-                joinColumn   = splitJoin[1]
-                bindColumns.append(joinColumn)
+                splitJoin = splitNames[1].split('.')
+                joinTable = [ splitJoin[0] ]
+                joinStmt  = []
+                for joinRow in joinHelper:
+                    helperNames = joinRow.split('@')
+                    helperJoin = helperNames[1].split('.')
+                    joinTable.append( "INNER JOIN {} USING({})".format(helperJoin[0], helperJoin[1]) )
+                    joinStmt.append( "{}.{} = ?".format(helperJoin[0], helperNames[0]) )
+                joinHelper = []
+                joinStmt.append("{}.{} = ?".format(splitJoin[0], colName))
+                bindColumns.append(splitJoin[1])
                 bindValues.append(
                     fkeySelectStr.format(
                         newValue=splitNames[1],
-                        table=joinTable,
-                        column=colName,
+                        table=" ".join(joinTable),
+                        stmt=" AND ".join(joinStmt),
                     )
                 )
         # now we can make the sql statement
@@ -733,6 +745,9 @@ def processImportFile(tdenv, db, importPath, tableName):
                         )
                     ) from None
                 importCount += 1
+            else:
+                if not tdenv.quiet:
+                    print("Wrong number of columns ({}:{}): {}".format(importPath, lineNo, ', '.join(linein)))
         db.commit()
         tdenv.DEBUG0("{count} {table}s imported",
                             count=importCount,

diff --git a/commands/export_cmd.py b/commands/export_cmd.py
@@ -10,12 +10,12 @@
 #
 # Generate the CSV files for the master data of the database.
 #
-# Note: This script makes some assumptions about the structure
-# of the database:
-#   * The column name of an foreign key reference must be the same
-#   * The referenced table must have a column named "name"
-#     which is UNIQUE
-#   * One column primary keys will be handled by the database engine
+# Note: This command makes some assumptions about the structure
+#       of the database:
+#          * The table should only have one UNIQUE index
+#          * The referenced table must have one UNIQUE index
+#          * The FK columns must have the same name in both tables
+#          * One column primary keys will be handled by the database engine
 #
 ######################################################################
 # CAUTION: If the database structure gets changed this script might
@@ -68,14 +68,68 @@
             type=str,
             default=None
         ),
+    ParseArgument('--delete-empty',
+            help='Delete CSV files without content.',
+            dest='deleteEmpty',
+            action='store_true',
+            default=False
+        ),
 ]
 
 ######################################################################
 # Helpers
 
-def search_dict(list, key, val):
+def search_keyList(list, val):
     for row in list:
-        if row[key] == val: return row
+        if row['from'] == row['to'] == val: return row
+
+def getUniqueIndex(conn, tableName):
+    # return the first unique index
+    idxCursor = conn.cursor()
+    unqIndex = []
+    for idxRow in idxCursor.execute("PRAGMA index_list('%s')" % tableName):
+        if idxRow['unique']:
+            # it's a unique index
+            unqCursor = conn.cursor()
+            for unqRow in unqCursor.execute("PRAGMA index_info('%s')" % idxRow['name']):
+                unqIndex.append(unqRow['name'])
+            return unqIndex
+    return unqIndex
+
+def getFKeyList(conn, tableName):
+    # get all single column FKs
+    keyList = []
+    keyCount = -1
+    keyCursor = conn.cursor()
+    for keyRow in keyCursor.execute("PRAGMA foreign_key_list('%s')" % tableName):
+        if keyRow['seq'] == 0:
+            keyCount += 1
+            keyList.append( {'table': keyRow['table'],
+                             'from': keyRow['from'],
+                             'to': keyRow['to']}
+                          )
+        if keyRow['seq'] == 1:
+            # if there is a second column, remove it from the list
+            keyList.remove( keyList[keyCount] )
+            keyCount -= 1
+
+    return keyList
+
+def buildFKeyStmt(conn, tableName, key):
+    unqIndex = getUniqueIndex(conn, key['table'])
+    keyList  = getFKeyList(conn, key['table'])
+    keyStmt = []
+    for colName in unqIndex:
+        # check if the column is a foreign key
+        keyKey = search_keyList(keyList, colName)
+        if keyKey:
+            newStmt = buildFKeyStmt(conn, key['table'], keyKey)
+            for row in newStmt:
+                keyStmt.append(row)
+        else:
+            keyStmt.append( {'table': tableName, 'column': colName, 'joinTable': key['table'], 'joinColumn': key['to']} )
+
+    return keyStmt
 
 ######################################################################
 # Perform query and populate result set
@@ -108,6 +162,10 @@ def run(results, cmdenv, tdb):
         bindValues = []
         tableStmt = ''
 
+    # prefix for unique/ignore columns
+    uniquePfx = "unq:"
+    ignorePfx = "!"
+
     tableCursor = conn.cursor()
     for row in tableCursor.execute("""
                                       SELECT name
@@ -130,76 +188,96 @@ def run(results, cmdenv, tdb):
         if not cmdenv.quiet:
             print("Export Table '{table}' to '{file}'".format(table=tableName, file=exportName))
 
+        lineCount = 0
         with exportName.open("w", encoding='utf-8', newline="\n") as exportFile:
             exportOut = csv.writer(exportFile, delimiter=",", quotechar="'", doublequote=True, quoting=csv.QUOTE_NONNUMERIC, lineterminator="\n")
 
             cur = conn.cursor()
-            keyList = []
-            for key in cur.execute("PRAGMA foreign_key_list('%s')" % tableName):
-                # ignore FKs to table StationItem
-                if key['table'] != 'StationItem':
-                    # only support FK joins with the same column name
-                    if key['from'] == key['to']:
-                        keyList += [ {'table': key['table'], 'column': key['from']} ]
 
+            # check for single PRIMARY KEY
             pkCount = 0
-            for col in cur.execute("PRAGMA table_info('%s')" % tableName):
+            for columnRow in cur.execute("PRAGMA table_info('%s')" % tableName):
                 # count the columns of the primary key
-                if col['pk'] > 0: pkCount += 1
+                if columnRow['pk'] > 0: pkCount += 1
+
+            # build column list
+            columnList = []
+            for columnRow in cur.execute("PRAGMA table_info('%s')" % tableName):
+                # if there is only one PK column, ignore it
+                if columnRow['pk'] > 0 and pkCount == 1: continue
+                columnList.append(columnRow)
+
+            # reverse the first two columns for some tables
+            if tableName in reverseList:
+                columnList[0], columnList[1] = columnList[1], columnList[0]
 
             # initialize helper lists
             csvHead    = []
             stmtColumn = []
             stmtTable  = [ tableName ]
             stmtOrder  = []
+            unqIndex   = getUniqueIndex(conn, tableName)
+            keyList    = getFKeyList(conn, tableName)
 
-            # iterate over all columns of the table
-            for col in cur.execute("PRAGMA table_info('%s')" % tableName):
-                # if there is only one PK column, ignore it
-                if col['pk'] > 0 and pkCount == 1: continue
+            cmdenv.DEBUG0('UNIQUE: ' + ", ".join(unqIndex))
 
+            # iterate over all columns of the table
+            for col in columnList:
                 # check if the column is a foreign key
-                key = search_dict(keyList, 'column', col['name'])
+                key = search_keyList(keyList, col['name'])
                 if key:
-                    # there must be a "name" column in the referenced table
-                    csvHead += [ "name@{}.{}".format(key['table'], key['column']) ]
-                    stmtColumn += [ "{}.name".format(key['table']) ]
-                    if col['notnull']:
-                        stmtTable += [ 'INNER JOIN {} USING({})'.format(key['table'], key['column']) ]
-                    else:
-                        stmtTable += [ 'LEFT OUTER JOIN {} USING({})'.format(key['table'], key['column']) ]
-                    stmtOrder += [ "{}.name".format(key['table']) ]
+                    # make the join statement
+                    keyStmt = buildFKeyStmt(conn, tableName, key)
+                    for keyRow in keyStmt:
+                        if cmdenv.debug > 0:
+                            print('FK-Stmt: {}'.format(keyRow))
+                        # is the join for the same table
+                        if keyRow['table'] == tableName:
+                            csvPfx = ''
+                            joinStmt = 'USING({})'.format(keyRow['joinColumn'])
+                        else:
+                            # this column must be ignored by the importer, it's only
+                            # used to resolve the FK relation
+                            csvPfx = ignorePfx
+                            joinStmt = 'ON {}.{} = {}.{}'.format(keyRow['table'], keyRow['joinColumn'], keyRow['joinTable'], keyRow['joinColumn'])
+                        if col['name'] in unqIndex:
+                            # column is part of an unique index
+                            csvPfx = uniquePfx + csvPfx
+                        csvHead += [ "{}{}@{}.{}".format(csvPfx, keyRow['column'], keyRow['joinTable'], keyRow['joinColumn']) ]
+                        stmtColumn += [ "{}.{}".format(keyRow['joinTable'], keyRow['column']) ]
+                        if col['notnull']:
+                            stmtTable += [ 'INNER JOIN {} {}'.format(keyRow['joinTable'], joinStmt) ]
+                        else:
+                            stmtTable += [ 'LEFT OUTER JOIN {} {}'.format(keyRow['joinTable'], joinStmt) ]
+                        stmtOrder += [ "{}.{}".format(keyRow['joinTable'], keyRow['column']) ]
                 else:
                     # ordinary column
-                    if col['name'] == 'name':
-                        # name columns must be unique
-                        csvHead += [ 'unq:name' ]
+                    if col['name'] in unqIndex:
+                        # column is part of an unique index
+                        csvHead += [ uniquePfx + col['name'] ]
                         stmtOrder += [ "{}.{}".format(tableName, col['name']) ]
                     else:
                         csvHead += [ col['name'] ]
                     stmtColumn += [ "{}.{}".format(tableName, col['name']) ]
 
-            # reverse the first two columns for some tables
-            if tableName in reverseList:
-                csvHead[0], csvHead[1] = csvHead[1], csvHead[0]
-                stmtColumn[0], stmtColumn[1] = stmtColumn[1], stmtColumn[0]
-                if len(stmtOrder) > 1:
-                    stmtOrder[0], stmtOrder[1] = stmtOrder[1], stmtOrder[0]
-
             # build the SQL statement
             sqlStmt = "SELECT {} FROM {}".format(",".join(stmtColumn)," ".join(stmtTable))
             if len(stmtOrder) > 0:
                 sqlStmt += " ORDER BY {}".format(",".join(stmtOrder))
             cmdenv.DEBUG0("SQL: %s" % sqlStmt)
 
             # finally generate the csv file
-            lineCount = 0
-            # no quotes for header line
+            # write header line without quotes
             exportFile.write("{}\n".format(",".join(csvHead)))
             for line in cur.execute(sqlStmt):
                 lineCount += 1
                 cmdenv.DEBUG2("{count}: {values}".format(count=lineCount, values=list(line)))
                 exportOut.writerow(list(line))
             cmdenv.DEBUG1("{count} {table}s exported".format(count=lineCount, table=tableName))
+        if cmdenv.deleteEmpty and lineCount == 0:
+            # delete file if emtpy
+            exportName.unlink()
+            if not cmdenv.quiet:
+                print("Delete empty file {file}'".format(file=exportName))
 
     return None
diff --git a/data/Added.csv b/data/Added.csv
@@ -12,8 +12,8 @@ unq:name
 'Beta3'
 'Beta3-Inferred'
 'Beyond The Pill (unverified)-Inferred'
-'Gamma1'
 'Gamma-Inferred'
+'Gamma1'
 'Gamma1-Inferred'
 'Not Present'
 'Premium Beta1'

diff --git a/data/AltItemNames.csv b/data/AltItemNames.csv
@@ -1,10 +1,10 @@
-unq:[email protected]_id,unq:alt_name
-'Agri-Medicines','agriculturalmedicines'
-'Consumer Tech','consumertechnology'
-'Dom. Appliances','domesticappliances'
-'H.E. Suits','hazardousenvironmentsuits'
-'Microbial Furnaces','heliostaticfurnaces'
-'Narcotics','basicnarcotics'
-'Non-Lethal Wpns','nonlethalweapons'
-'Reactive Armour','reactivearmour'
-'Land Enrichment Systems','terrainenrichmentsystems'
+unq:[email protected]_id,unq:[email protected]_id,unq:alt_name
+'Consumer Items','Consumer Tech','consumertechnology'
+'Consumer Items','Dom. Appliances','domesticappliances'
+'Legal Drugs','Narcotics','basicnarcotics'
+'Machinery','Microbial Furnaces','heliostaticfurnaces'
+'Medicines','Agri-Medicines','agriculturalmedicines'
+'Technology','H.E. Suits','hazardousenvironmentsuits'
+'Technology','Land Enrichment Systems','terrainenrichmentsystems'
+'Weapons','Non-Lethal Wpns','nonlethalweapons'
+'Weapons','Reactive Armour','reactivearmour'
diff --git a/data/Category.csv b/data/Category.csv
@@ -1,4 +1,4 @@
-name
+unq:name
 'Chemicals'
 'Consumer Items'
 'Foods'

diff --git a/data/Item.csv b/data/Item.csv
@@ -1,16 +1,11 @@
-[email protected]_id,unq:name
+unq:[email protected]_id,unq:name
 'Chemicals','Explosives'
 'Chemicals','Hydrogen Fuel'
 'Chemicals','Mineral Oil'
 'Chemicals','Pesticides'
 'Consumer Items','Clothing'
 'Consumer Items','Consumer Tech'
 'Consumer Items','Dom. Appliances'
-'Legal Drugs','Beer'
-'Legal Drugs','Liquor'
-'Legal Drugs','Narcotics'
-'Legal Drugs','Tobacco'
-'Legal Drugs','Wine'
 'Foods','Algae'
 'Foods','Animal Meat'
 'Foods','Coffee'
@@ -25,6 +20,11 @@ [email protected]_id,unq:name
 'Industrial Materials','Polymers'
 'Industrial Materials','Semiconductors'
 'Industrial Materials','Superconductors'
+'Legal Drugs','Beer'
+'Legal Drugs','Liquor'
+'Legal Drugs','Narcotics'
+'Legal Drugs','Tobacco'
+'Legal Drugs','Wine'
 'Machinery','Atmospheric Processors'
 'Machinery','Crop Harvesters'
 'Machinery','Marine Equipment'

diff --git a/data/Ship.csv b/data/Ship.csv
@@ -1,9 +1,9 @@
-name,capacity,mass,drive_rating,max_ly_empty,max_ly_full,max_speed,boost_speed
+unq:name,capacity,mass,drive_rating,max_ly_empty,max_ly_full,max_speed,boost_speed
+'Anaconda',228,2600,52345.0,19.7,17.6,180,235
+'Cobra',36,114,1155.0,9.94,7.3,280,400
 'Eagle',6,52,348.0,6.59,6.0,240,350
-'Sidewinder',4,47,348.0,8.13,7.25,220,293
 'Hauler',16,39,348.0,8.74,6.1,200,246
-'Viper',8,40,348.0,13.49,9.16,320,500
-'Cobra',36,114,1155.0,9.94,7.3,280,400
 'Lakon Type 6',100,113,3455.0,29.36,15.64,220,329
 'Lakon Type 9',440,1275,23720.0,18.22,13.34,130,200
-'Anaconda',228,2600,52345.0,19.7,17.6,180,235
+'Sidewinder',4,47,348.0,8.13,7.25,220,293
+'Viper',8,40,348.0,13.49,9.16,320,500