From facd2afa6a9595a6cc16154a04be5b211b79fef6 Mon Sep 17 00:00:00 2001 From: Xinrong Meng Date: Wed, 8 Mar 2023 14:58:31 +0800 Subject: [PATCH 1/2] docstring --- python/pyspark/sql/pandas/map_ops.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/pandas/map_ops.py b/python/pyspark/sql/pandas/map_ops.py index 2184fdce52d4..eb8469223070 100644 --- a/python/pyspark/sql/pandas/map_ops.py +++ b/python/pyspark/sql/pandas/map_ops.py @@ -44,7 +44,8 @@ def mapInPandas( together as an iterator of `pandas.DataFrame`\\s to the function and the returned iterator of `pandas.DataFrame`\\s are combined as a :class:`DataFrame`. Each `pandas.DataFrame` size can be controlled by - `spark.sql.execution.arrow.maxRecordsPerBatch`. + `spark.sql.execution.arrow.maxRecordsPerBatch`. The size of the function's input and + output might be different. .. versionadded:: 3.0.0 @@ -108,7 +109,8 @@ def mapInArrow( together as an iterator of `pyarrow.RecordBatch`\\s to the function and the returned iterator of `pyarrow.RecordBatch`\\s are combined as a :class:`DataFrame`. Each `pyarrow.RecordBatch` size can be controlled by - `spark.sql.execution.arrow.maxRecordsPerBatch`. + `spark.sql.execution.arrow.maxRecordsPerBatch`. The size of the function's input and + output might be different. .. versionadded:: 3.3.0 From d74ebe5a51a0007f53d9254f0d24714b8fea39a2 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 8 Mar 2023 19:37:01 +0900 Subject: [PATCH 2/2] Apply suggestions from code review --- python/pyspark/sql/pandas/map_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/pandas/map_ops.py b/python/pyspark/sql/pandas/map_ops.py index eb8469223070..1370cad33b1d 100644 --- a/python/pyspark/sql/pandas/map_ops.py +++ b/python/pyspark/sql/pandas/map_ops.py @@ -45,7 +45,7 @@ def mapInPandas( returned iterator of `pandas.DataFrame`\\s are combined as a :class:`DataFrame`. Each `pandas.DataFrame` size can be controlled by `spark.sql.execution.arrow.maxRecordsPerBatch`. The size of the function's input and - output might be different. + output can be different. .. versionadded:: 3.0.0 @@ -110,7 +110,7 @@ def mapInArrow( returned iterator of `pyarrow.RecordBatch`\\s are combined as a :class:`DataFrame`. Each `pyarrow.RecordBatch` size can be controlled by `spark.sql.execution.arrow.maxRecordsPerBatch`. The size of the function's input and - output might be different. + output can be different. .. versionadded:: 3.3.0