@@ -1354,7 +1354,7 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True, index=Tr
1354
1354
1355
1355
# TODO: enable doctests once we drop Spark 2.3.x (due to type coercion logic
1356
1356
# when creating arrays)
1357
- def transpose (self , limit : Optional [ int ] = 1000 ):
1357
+ def transpose (self ):
1358
1358
"""
1359
1359
Transpose index and columns.
1360
1360
@@ -1365,23 +1365,17 @@ def transpose(self, limit: Optional[int] = 1000):
1365
1365
.. note:: This method is based on an expensive operation due to the nature
1366
1366
of big data. Internally it needs to generate each row for each value, and
1367
1367
then group twice - it is a huge operation. To prevent misusage, this method
1368
- has the default limit of input length, 1000 and raises a ValueError.
1368
+ has the 'compute.max_rows' default limit of input length, and raises a ValueError.
1369
1369
1370
+ >>> from databricks.koalas.config import get_option, set_option
1371
+ >>> set_option('compute.max_rows', 1000)
1370
1372
>>> ks.DataFrame({'a': range(1001)}).transpose() # doctest: +NORMALIZE_WHITESPACE
1371
1373
Traceback (most recent call last):
1372
1374
...
1373
1375
ValueError: Current DataFrame has more then the given limit 1000 rows.
1374
- Please use df.transpose(limit=<maximum number of rows>) to retrieve more than
1375
- 1000 rows. Note that, before changing the given 'limit', this operation is
1376
- considerably expensive.
1377
-
1378
- Parameters
1379
- ----------
1380
- limit : int, optional
1381
- This parameter sets the limit of the current DataFrame. Set `None` to unlimit
1382
- the input length. When the limit is set, it is executed by the shortcut by collecting
1383
- the data into driver side, and then using pandas API. If the limit is unset,
1384
- the operation is executed by PySpark. Default is 1000.
1376
+ Please set 'compute.max_rows' by using 'databricks.koalas.config.set_option'
1377
+ to retrieve to retrieve more than 1000 rows. Note that, before changing the
1378
+ 'compute.max_rows', this operation is considerably expensive.
1385
1379
1386
1380
Returns
1387
1381
-------
@@ -1461,14 +1455,16 @@ def transpose(self, limit: Optional[int] = 1000):
1461
1455
1 float64
1462
1456
dtype: object
1463
1457
"""
1464
- if limit is not None :
1465
- pdf = self .head (limit + 1 )._to_internal_pandas ()
1466
- if len (pdf ) > limit :
1458
+ max_compute_count = get_option ("compute.max_rows" )
1459
+ if max_compute_count is not None :
1460
+ pdf = self .head (max_compute_count + 1 )._to_internal_pandas ()
1461
+ if len (pdf ) > max_compute_count :
1467
1462
raise ValueError (
1468
- "Current DataFrame has more then the given limit %s rows. Please use "
1469
- "df.transpose(limit=<maximum number of rows>) to retrieve more than %s rows. "
1470
- "Note that, before changing the given 'limit', this operation is considerably "
1471
- "expensive." % (limit , limit ))
1463
+ "Current DataFrame has more then the given limit {0} rows. "
1464
+ "Please set 'compute.max_rows' by using 'databricks.koalas.config.set_option' "
1465
+ "to retrieve to retrieve more than {0} rows. Note that, before changing the "
1466
+ "'compute.max_rows', this operation is considerably expensive."
1467
+ .format (max_compute_count ))
1472
1468
return DataFrame (pdf .transpose ())
1473
1469
1474
1470
# Explode the data to be pairs.
0 commit comments