diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py index cef0ea4f305df..7c17c4001f6db 100644 --- a/python/pyspark/sql/connect/client/core.py +++ b/python/pyspark/sql/connect/client/core.py @@ -932,7 +932,10 @@ def _proto_to_string(self, p: google.protobuf.message.Message) -> str: ------- Single line string of the serialized proto message. """ - return text_format.MessageToString(p, as_one_line=True) + try: + return text_format.MessageToString(p, as_one_line=True) + except RecursionError: + return "" def schema(self, plan: pb2.Plan) -> StructType: """ diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py index daf6772e52bf5..7a224d68219b0 100755 --- a/python/pyspark/sql/tests/connect/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -159,6 +159,19 @@ def spark_connect_clean_up_test_data(cls): class SparkConnectBasicTests(SparkConnectSQLTestCase): + def test_recursion_handling_for_plan_logging(self): + """SPARK-45852 - Test that we can handle recursion in plan logging.""" + cdf = self.connect.range(1) + for x in range(400): + cdf = cdf.withColumn(f"col_{x}", CF.lit(x)) + + # Calling schema will trigger logging the message that will in turn trigger the message + # conversion into protobuf that will then trigger the recursion error. + self.assertIsNotNone(cdf.schema) + + result = self.connect._client._proto_to_string(cdf._plan.to_proto(self.connect._client)) + self.assertIn("recursion", result) + def test_df_getattr_behavior(self): cdf = self.connect.range(10) sdf = self.spark.range(10)