@@ -307,8 +307,16 @@ def read_json(path: str, index_col: Optional[Union[str, List[str]]] = None, **op
307
307
col 1 col 2
308
308
0 a b
309
309
1 c d
310
+
311
+ >>> df.to_json(path=r'%s/read_json/foo.json' % path, num_files=1, lineSep='___')
312
+ >>> ks.read_json(
313
+ ... path=r'%s/read_json/foo.json' % path, lineSep='___'
314
+ ... ).sort_values(by="col 1")
315
+ col 1 col 2
316
+ 0 a b
317
+ 1 c d
310
318
"""
311
- return read_spark_io (path , format = 'json' , index_col = index_col , options = options )
319
+ return read_spark_io (path , format = 'json' , index_col = index_col , ** options )
312
320
313
321
314
322
def read_delta (path : str , version : Optional [str ] = None , timestamp : Optional [str ] = None ,
@@ -351,12 +359,25 @@ def read_delta(path: str, version: Optional[str] = None, timestamp: Optional[str
351
359
>>> ks.read_delta('%s/read_delta/foo' % path)
352
360
id
353
361
0 0
362
+
363
+ >>> ks.range(10, 15, num_partitions=1).to_delta('%s/read_delta/foo' % path, mode='overwrite')
364
+ >>> ks.read_delta('%s/read_delta/foo' % path)
365
+ id
366
+ 0 10
367
+ 1 11
368
+ 2 12
369
+ 3 13
370
+ 4 14
371
+
372
+ >>> ks.read_delta('%s/read_delta/foo' % path, version=0)
373
+ id
374
+ 0 0
354
375
"""
355
376
if version is not None :
356
377
options ['versionAsOf' ] = version
357
378
if timestamp is not None :
358
379
options ['timestampAsOf' ] = timestamp
359
- return read_spark_io (path , format = 'delta' , index_col = index_col , options = options )
380
+ return read_spark_io (path , format = 'delta' , index_col = index_col , ** options )
360
381
361
382
362
383
def read_table (name : str , index_col : Optional [Union [str , List [str ]]] = None ) -> DataFrame :
@@ -436,8 +457,19 @@ def read_spark_io(path: Optional[str] = None, format: Optional[str] = None,
436
457
... '%s/read_spark_io/data.parquet' % path, format='parquet', schema='id long')
437
458
id
438
459
0 0
460
+
461
+ >>> ks.range(10, 15, num_partitions=1).to_spark_io('%s/read_spark_io/data.json' % path,
462
+ ... format='json', lineSep='__')
463
+ >>> ks.read_spark_io(
464
+ ... '%s/read_spark_io/data.json' % path, format='json', schema='id long', lineSep='__')
465
+ id
466
+ 0 10
467
+ 1 11
468
+ 2 12
469
+ 3 13
470
+ 4 14
439
471
"""
440
- sdf = default_session ().read .load (path = path , format = format , schema = schema , options = options )
472
+ sdf = default_session ().read .load (path = path , format = format , schema = schema , ** options )
441
473
index_map = _get_index_map (sdf , index_col )
442
474
443
475
return DataFrame (_InternalFrame (sdf = sdf , index_map = index_map ))
@@ -722,7 +754,7 @@ def read_excel(io, sheet_name=0, header=0, names=None, index_col=None, usecols=N
722
754
na_values = na_values , keep_default_na = keep_default_na , verbose = verbose ,
723
755
parse_dates = parse_dates , date_parser = date_parser , thousands = thousands , comment = comment ,
724
756
skipfooter = skipfooter , convert_float = convert_float , mangle_dupe_cols = mangle_dupe_cols ,
725
- kwds = kwds )
757
+ ** kwds )
726
758
if isinstance (pdfs , dict ):
727
759
return OrderedDict ([(key , from_pandas (value )) for key , value in pdfs .items ()])
728
760
else :
@@ -991,9 +1023,9 @@ def read_sql(sql, con, index_col=None, columns=None, **options):
991
1023
"""
992
1024
striped = sql .strip ()
993
1025
if ' ' not in striped : # TODO: identify the table name or not more precisely.
994
- return read_sql_table (sql , con , index_col = index_col , columns = columns , options = options )
1026
+ return read_sql_table (sql , con , index_col = index_col , columns = columns , ** options )
995
1027
else :
996
- return read_sql_query (sql , con , index_col = index_col , options = options )
1028
+ return read_sql_query (sql , con , index_col = index_col , ** options )
997
1029
998
1030
999
1031
def to_datetime (arg , errors = 'raise' , format = None , unit = None , infer_datetime_format = False ,
0 commit comments