@@ -162,7 +162,7 @@ def create_schema_if_not_exists(uri: str, schema_name: str, comment: str | None
162
162
logger .exception (f"Error creating schema '{ schema_name } ' in DB '{ db_name } '" )
163
163
164
164
165
- def set_column_as_primary_key (
165
+ def make_int_column_primary_key_identity (
166
166
uri : str ,
167
167
* ,
168
168
schema_name : str = "public" ,
@@ -173,6 +173,7 @@ def set_column_as_primary_key(
173
173
Make an existing index column (integer type) as primary key with auto increment (identity).
174
174
175
175
This is used because pl.DataFrame.write_database() doesn't support writing index column as primary key.
176
+ Also, it will automatically set the start value of auto increment to the max value in the column.
176
177
177
178
Example:
178
179
>>> df = pl.DataFrame({"smiles": ["CCO", "CCN", "CCC"]}) # doctest: +SKIP
@@ -227,6 +228,46 @@ def set_column_as_primary_key(
227
228
)
228
229
229
230
231
+ def make_columns_primary_key (
232
+ uri : str ,
233
+ * ,
234
+ schema_name : str = "public" ,
235
+ table_name : str ,
236
+ column_names : str | Sequence [str ],
237
+ ):
238
+ """
239
+ Make multiple columns as primary key but without auto increment (identity).
240
+
241
+ This is similar to make_columns_unique() but with primary key constraint.
242
+ """
243
+ with psycopg .connect (
244
+ conninfo = uri ,
245
+ ) as conn :
246
+ try :
247
+ cursor = conn .cursor ()
248
+
249
+ if isinstance (column_names , str ):
250
+ column_names = [column_names ]
251
+
252
+ cursor .execute (
253
+ sql .SQL ("""
254
+ ALTER TABLE {table}
255
+ ADD PRIMARY KEY ({columns});
256
+ """ ).format (
257
+ table = sql .Identifier (schema_name , table_name ),
258
+ columns = sql .SQL ("," ).join (
259
+ sql .Identifier (col ) for col in column_names
260
+ ),
261
+ )
262
+ )
263
+ conn .commit ()
264
+
265
+ except psycopg .Error :
266
+ logger .exception (
267
+ f"Error setting primary key for column '{ column_names } ' in table '{ table_name } '"
268
+ )
269
+
270
+
230
271
def make_columns_unique (
231
272
uri : str ,
232
273
* ,
@@ -250,9 +291,9 @@ def make_columns_unique(
250
291
251
292
cursor .execute (
252
293
query = sql .SQL ("""
253
- ALTER TABLE {table}
254
- ADD CONSTRAINT {table_unique_constraint}
255
- UNIQUE ({columns});
294
+ ALTER TABLE {table}
295
+ ADD CONSTRAINT {table_unique_constraint}
296
+ UNIQUE ({columns});
256
297
""" ).format (
257
298
table = sql .Identifier (schema_name , table_name ),
258
299
table_unique_constraint = sql .Identifier (
@@ -267,7 +308,7 @@ def make_columns_unique(
267
308
268
309
except psycopg .Error :
269
310
logger .exception (
270
- f"Error setting primary key for column '{ column_names } ' in table '{ table_name } '"
311
+ f"Error setting unique constraint for column '{ column_names } ' in table '{ table_name } '"
271
312
)
272
313
273
314
@@ -479,7 +520,18 @@ def polars_write_database(
479
520
for col , dtype in columns_dtype .items ()
480
521
}
481
522
482
- df .to_pandas (use_pyarrow_extension_array = True ).to_sql (
523
+ pd_df = df .to_pandas (use_pyarrow_extension_array = True )
524
+
525
+ # If any column has type list[number] in Polars, the pandas DataFrame will have a numpy array.
526
+ # We need to convert it to a list, because `to_sql` doesn't support numpy arrays.
527
+ for col , dtype in columns_dtype .items ():
528
+ if isinstance (dtype , pl .List ):
529
+ if isinstance (dtype .inner , pl .Utf8 ):
530
+ continue
531
+ pd_df [col ] = pd_df [col ].apply (lambda x : x .tolist ())
532
+
533
+ # ic(pd_df)
534
+ pd_df .to_sql (
483
535
schema = schema_name ,
484
536
name = table_name ,
485
537
con = connection ,
0 commit comments