diff --git a/python/pyspark/pandas/data_type_ops/base.py b/python/pyspark/pandas/data_type_ops/base.py index 5a4cd7a1eb07..2df40252965b 100644 --- a/python/pyspark/pandas/data_type_ops/base.py +++ b/python/pyspark/pandas/data_type_ops/base.py @@ -150,7 +150,10 @@ def _as_bool_type(index_ops: IndexOpsLike, dtype: Dtype) -> IndexOpsLike: if isinstance(dtype, extension_dtypes): scol = index_ops.spark.column.cast(spark_type) else: - scol = F.when(index_ops.spark.column.isNull(), F.lit(False)).otherwise( + null_value = ( + F.lit(True) if isinstance(index_ops.spark.data_type, DecimalType) else F.lit(False) + ) + scol = F.when(index_ops.spark.column.isNull(), null_value).otherwise( index_ops.spark.column.cast(spark_type) ) return index_ops._with_new_scol( diff --git a/python/pyspark/pandas/tests/data_type_ops/test_as_type.py b/python/pyspark/pandas/tests/data_type_ops/test_as_type.py index 9d5c0d03d548..3fbfcf42f49f 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_as_type.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_as_type.py @@ -54,10 +54,7 @@ def test_astype(self): lambda: psser.astype(int_type), ) - # TODO(SPARK-37039): the np.nan series.astype(bool) should be True - if not pser.hasnans: - self.assert_eq(pser.astype(bool), psser.astype(bool)) - + self.assert_eq(pser.astype(bool), psser.astype(bool)) self.assert_eq(pser.astype(float), psser.astype(float)) self.assert_eq(pser.astype(np.float32), psser.astype(np.float32)) self.assert_eq(pser.astype(str), psser.astype(str))