Skip to content

Commit dccbf65

Browse files
committed
New -pk/--primary-key options, closes #22
1 parent bb71bab commit dccbf65

File tree

4 files changed

+39
-4
lines changed

4 files changed

+39
-4
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ They will be populated with IDs that reference the new derived tables.
133133
datetimes
134134
-df, --datetime-format TEXT One or more custom date format strings to try
135135
when parsing dates/datetimes
136+
-pk, --primary-key TEXT One or more columns to use as the primary key
136137
-f, --fts TEXT One or more columns to use to populate a full-
137138
text index
138139
-i, --index TEXT Add index on this column (or a compound index

csvs_to_sqlite/cli.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@
5353
@click.option('--datetime-format', '-df', multiple=True, help=(
5454
"One or more custom date format strings to try when parsing dates/datetimes"
5555
))
56+
@click.option('--primary-key', '-pk', multiple=True, help=(
57+
"One or more columns to use as the primary key"
58+
))
5659
@click.option('--fts', '-f', multiple=True, help=(
5760
"One or more columns to use to populate a full-text index"
5861
))
@@ -76,6 +79,7 @@ def cli(
7679
date,
7780
datetime,
7881
datetime_format,
82+
primary_key,
7983
fts,
8084
index,
8185
shape,
@@ -155,6 +159,7 @@ def cli(
155159
else:
156160
to_sql_with_foreign_keys(
157161
conn, df, df.table_name, foreign_keys, sql_type_overrides,
162+
primary_keys=primary_key,
158163
index_fks=not no_index_fks
159164
)
160165
created_tables[df.table_name] = df

csvs_to_sqlite/utils.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def drop_table(conn, table):
194194
conn.execute('DROP TABLE [{}]'.format(table))
195195

196196

197-
def get_create_table_sql(table_name, df, index=True, sql_type_overrides=None, **extra_args):
197+
def get_create_table_sql(table_name, df, index=True, sql_type_overrides=None, primary_keys=None):
198198
# Create a temporary table with just the first row
199199
# We do this in memory because we just want to get the
200200
# CREATE TABLE statement
@@ -226,7 +226,7 @@ def get_create_table_sql(table_name, df, index=True, sql_type_overrides=None, **
226226
# Everything was NaN or an integer-float - switch type:
227227
sql_type_overrides[column] = 'INTEGER'
228228

229-
df[:1].to_sql(table_name, conn, index=index, dtype=sql_type_overrides, **extra_args)
229+
df[:1].to_sql(table_name, conn, index=index, dtype=sql_type_overrides)
230230
sql = conn.execute(
231231
'select sql from sqlite_master where name = ?', [table_name]
232232
).fetchone()[0]
@@ -235,11 +235,17 @@ def get_create_table_sql(table_name, df, index=True, sql_type_overrides=None, **
235235
'PRAGMA table_info([{}])'.format(table_name)
236236
)
237237
]
238+
if primary_keys:
239+
# Rewrite SQL to add PRIMARY KEY (col1, col2) at end
240+
assert sql[-1] == ')'
241+
sql = sql[:-1] + ' ,PRIMARY KEY ({cols})\n)'.format(
242+
cols=', '.join('[{}]'.format(col) for col in primary_keys)
243+
)
238244
return sql, columns
239245

240246

241-
def to_sql_with_foreign_keys(conn, df, name, foreign_keys, sql_type_overrides=None, index_fks=False):
242-
create_sql, columns = get_create_table_sql(name, df, index=False, sql_type_overrides=sql_type_overrides)
247+
def to_sql_with_foreign_keys(conn, df, name, foreign_keys, sql_type_overrides=None, primary_keys=None, index_fks=False):
248+
create_sql, columns = get_create_table_sql(name, df, index=False, primary_keys=primary_keys, sql_type_overrides=sql_type_overrides)
243249
foreign_key_bits = []
244250
index_bits = []
245251
for column, (table, value_column) in foreign_keys.items():

tests/test_csvs_to_sqlite.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323
CSV_DATES_CUSTOM_FORMAT = '''headline,date
2424
Custom format,03/02/01'''
2525

26+
CSV_CUSTOM_PRIMARY_KEYS = '''pk1,pk2,name
27+
one,one,11
28+
one,two,12
29+
two,one,21'''
2630

2731
def test_flat():
2832
runner = CliRunner()
@@ -430,3 +434,22 @@ def test_extract_cols_no_fts():
430434
where type='table' and name like '%_fts'
431435
and sql like '%USING FTS%'
432436
''').fetchall()
437+
438+
439+
def test_custom_primary_keys():
440+
runner = CliRunner()
441+
with runner.isolated_filesystem():
442+
open('pks.csv', 'w').write(CSV_CUSTOM_PRIMARY_KEYS)
443+
result = runner.invoke(
444+
cli.cli, (
445+
'pks.csv pks.db -pk pk1 --primary-key pk2'
446+
).split()
447+
)
448+
assert result.exit_code == 0
449+
conn = sqlite3.connect('pks.db')
450+
pks = [
451+
r[1]
452+
for r in conn.execute('PRAGMA table_info("pks")').fetchall()
453+
if r[-1]
454+
]
455+
assert ['pk1', 'pk2'] == pks

0 commit comments

Comments
 (0)