diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 49acf0476623e..c144b410bc384 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -4579,6 +4579,12 @@ def test_createDataFrame_with_float_index(self): self.spark.createDataFrame( pd.DataFrame({'a': [1, 2, 3]}, index=[2., 3., 4.])).distinct().count(), 3) + def test_no_partition_toPandas(self): + # SPARK-32300: toPandas should work from a Spark DataFrame with no partitions + pdf = self.spark.sparkContext.emptyRDD().toDF("col1 int").toPandas() + self.assertEqual(len(pdf), 0) + self.assertEqual(list(pdf.columns), ["col1"]) + @unittest.skipIf( not _have_pandas or not _have_pyarrow, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index a755a6f5e0371..6e4577591dab3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -3290,7 +3290,7 @@ class Dataset[T] private[sql]( val numPartitions = arrowBatchRdd.partitions.length // Store collection results for worst case of 1 to N-1 partitions - val results = new Array[Array[Array[Byte]]](numPartitions - 1) + val results = new Array[Array[Array[Byte]]](Math.max(0, numPartitions - 1)) var lastIndex = -1 // index of last partition written // Handler to eagerly write partitions to Python in order