Skip to content

Commit 8b0f760

Browse files
ueshinHyukjinKwon
authored andcommitted
Fix passing options as keyword arguments (#968)
Resolves #967.
1 parent 979f4b8 commit 8b0f760

File tree

2 files changed

+42
-10
lines changed

2 files changed

+42
-10
lines changed

databricks/koalas/frame.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -3029,7 +3029,7 @@ def to_table(self, name: str, format: Optional[str] = None, mode: str = 'error',
30293029
>>> df.to_table('%s.my_table' % db, partition_cols='date')
30303030
"""
30313031
self.to_spark().write.saveAsTable(name=name, format=format, mode=mode,
3032-
partitionBy=partition_cols, options=options)
3032+
partitionBy=partition_cols, **options)
30333033

30343034
def to_delta(self, path: str, mode: str = 'error',
30353035
partition_cols: Union[str, List[str], None] = None, **options):
@@ -3084,10 +3084,10 @@ def to_delta(self, path: str, mode: str = 'error',
30843084
Overwrite an existing table's partitions, using the 'replaceWhere' capability in Delta:
30853085
30863086
>>> df.to_delta('%s/to_delta/bar' % path,
3087-
... mode='overwrite', replaceWhere='date >= "2019-01-01"')
3087+
... mode='overwrite', replaceWhere='date >= "2012-01-01"')
30883088
"""
30893089
self.to_spark_io(
3090-
path=path, mode=mode, format="delta", partition_cols=partition_cols, options=options)
3090+
path=path, mode=mode, format="delta", partition_cols=partition_cols, **options)
30913091

30923092
def to_parquet(self, path: str, mode: str = 'error',
30933093
partition_cols: Union[str, List[str], None] = None,
@@ -3193,7 +3193,7 @@ def to_spark_io(self, path: Optional[str] = None, format: Optional[str] = None,
31933193
>>> df.to_spark_io(path='%s/to_spark_io/foo.json' % path, format='json')
31943194
"""
31953195
self.to_spark().write.save(
3196-
path=path, format=format, mode=mode, partitionBy=partition_cols, options=options)
3196+
path=path, format=format, mode=mode, partitionBy=partition_cols, **options)
31973197

31983198
def to_spark(self, index_col: Optional[Union[str, List[str]]] = None):
31993199
"""

databricks/koalas/namespace.py

+38-6
Original file line numberDiff line numberDiff line change
@@ -307,8 +307,16 @@ def read_json(path: str, index_col: Optional[Union[str, List[str]]] = None, **op
307307
col 1 col 2
308308
0 a b
309309
1 c d
310+
311+
>>> df.to_json(path=r'%s/read_json/foo.json' % path, num_files=1, lineSep='___')
312+
>>> ks.read_json(
313+
... path=r'%s/read_json/foo.json' % path, lineSep='___'
314+
... ).sort_values(by="col 1")
315+
col 1 col 2
316+
0 a b
317+
1 c d
310318
"""
311-
return read_spark_io(path, format='json', index_col=index_col, options=options)
319+
return read_spark_io(path, format='json', index_col=index_col, **options)
312320

313321

314322
def read_delta(path: str, version: Optional[str] = None, timestamp: Optional[str] = None,
@@ -351,12 +359,25 @@ def read_delta(path: str, version: Optional[str] = None, timestamp: Optional[str
351359
>>> ks.read_delta('%s/read_delta/foo' % path)
352360
id
353361
0 0
362+
363+
>>> ks.range(10, 15, num_partitions=1).to_delta('%s/read_delta/foo' % path, mode='overwrite')
364+
>>> ks.read_delta('%s/read_delta/foo' % path)
365+
id
366+
0 10
367+
1 11
368+
2 12
369+
3 13
370+
4 14
371+
372+
>>> ks.read_delta('%s/read_delta/foo' % path, version=0)
373+
id
374+
0 0
354375
"""
355376
if version is not None:
356377
options['versionAsOf'] = version
357378
if timestamp is not None:
358379
options['timestampAsOf'] = timestamp
359-
return read_spark_io(path, format='delta', index_col=index_col, options=options)
380+
return read_spark_io(path, format='delta', index_col=index_col, **options)
360381

361382

362383
def read_table(name: str, index_col: Optional[Union[str, List[str]]] = None) -> DataFrame:
@@ -436,8 +457,19 @@ def read_spark_io(path: Optional[str] = None, format: Optional[str] = None,
436457
... '%s/read_spark_io/data.parquet' % path, format='parquet', schema='id long')
437458
id
438459
0 0
460+
461+
>>> ks.range(10, 15, num_partitions=1).to_spark_io('%s/read_spark_io/data.json' % path,
462+
... format='json', lineSep='__')
463+
>>> ks.read_spark_io(
464+
... '%s/read_spark_io/data.json' % path, format='json', schema='id long', lineSep='__')
465+
id
466+
0 10
467+
1 11
468+
2 12
469+
3 13
470+
4 14
439471
"""
440-
sdf = default_session().read.load(path=path, format=format, schema=schema, options=options)
472+
sdf = default_session().read.load(path=path, format=format, schema=schema, **options)
441473
index_map = _get_index_map(sdf, index_col)
442474

443475
return DataFrame(_InternalFrame(sdf=sdf, index_map=index_map))
@@ -722,7 +754,7 @@ def read_excel(io, sheet_name=0, header=0, names=None, index_col=None, usecols=N
722754
na_values=na_values, keep_default_na=keep_default_na, verbose=verbose,
723755
parse_dates=parse_dates, date_parser=date_parser, thousands=thousands, comment=comment,
724756
skipfooter=skipfooter, convert_float=convert_float, mangle_dupe_cols=mangle_dupe_cols,
725-
kwds=kwds)
757+
**kwds)
726758
if isinstance(pdfs, dict):
727759
return OrderedDict([(key, from_pandas(value)) for key, value in pdfs.items()])
728760
else:
@@ -991,9 +1023,9 @@ def read_sql(sql, con, index_col=None, columns=None, **options):
9911023
"""
9921024
striped = sql.strip()
9931025
if ' ' not in striped: # TODO: identify the table name or not more precisely.
994-
return read_sql_table(sql, con, index_col=index_col, columns=columns, options=options)
1026+
return read_sql_table(sql, con, index_col=index_col, columns=columns, **options)
9951027
else:
996-
return read_sql_query(sql, con, index_col=index_col, options=options)
1028+
return read_sql_query(sql, con, index_col=index_col, **options)
9971029

9981030

9991031
def to_datetime(arg, errors='raise', format=None, unit=None, infer_datetime_format=False,

0 commit comments

Comments
 (0)