diff --git a/python/pyspark/sql/pandas/map_ops.py b/python/pyspark/sql/pandas/map_ops.py index 2184fdce52d4..1370cad33b1d 100644 --- a/python/pyspark/sql/pandas/map_ops.py +++ b/python/pyspark/sql/pandas/map_ops.py @@ -44,7 +44,8 @@ def mapInPandas( together as an iterator of `pandas.DataFrame`\\s to the function and the returned iterator of `pandas.DataFrame`\\s are combined as a :class:`DataFrame`. Each `pandas.DataFrame` size can be controlled by - `spark.sql.execution.arrow.maxRecordsPerBatch`. + `spark.sql.execution.arrow.maxRecordsPerBatch`. The size of the function's input and + output can be different. .. versionadded:: 3.0.0 @@ -108,7 +109,8 @@ def mapInArrow( together as an iterator of `pyarrow.RecordBatch`\\s to the function and the returned iterator of `pyarrow.RecordBatch`\\s are combined as a :class:`DataFrame`. Each `pyarrow.RecordBatch` size can be controlled by - `spark.sql.execution.arrow.maxRecordsPerBatch`. + `spark.sql.execution.arrow.maxRecordsPerBatch`. The size of the function's input and + output can be different. .. versionadded:: 3.3.0