apache · sryza · Oct 6, 2025
diff --git a/python/pyspark/pipelines/spark_connect_graph_element_registry.py b/python/pyspark/pipelines/spark_connect_graph_element_registry.py
@@ -47,9 +47,6 @@ def __init__(self, spark: SparkSession, dataflow_graph_id: str) -> None:
 
     def register_dataset(self, dataset: Dataset) -> None:
         if isinstance(dataset, Table):
-            table_properties = dataset.table_properties
-            partition_cols = dataset.partition_cols
-
             if isinstance(dataset.schema, str):
                 schema_string = dataset.schema
                 schema_data_type = None
@@ -60,7 +57,15 @@ def register_dataset(self, dataset: Dataset) -> None:
                 schema_string = None
                 schema_data_type = None
 
-            format = dataset.format
+            table_details = pb2.PipelineCommand.DefineDataset.TableDetails(
+                table_properties=dataset.table_properties,
+                partition_cols=dataset.partition_cols,
+                format=dataset.format,
+                # Even though schema_string is not required, the generated Python code seems to
+                # erroneously think it is required.
+                schema_string=schema_string,  # type: ignore[arg-type]
+                schema_data_type=schema_data_type,
+            )
 
             if isinstance(dataset, MaterializedView):
                 dataset_type = pb2.DatasetType.MATERIALIZED_VIEW
@@ -72,12 +77,8 @@ def register_dataset(self, dataset: Dataset) -> None:
                     messageParameters={"dataset_type": type(dataset).__name__},
                 )
         elif isinstance(dataset, TemporaryView):
-            table_properties = None
-            partition_cols = None
-            schema_string = None
-            schema_data_type = None
-            format = None
             dataset_type = pb2.DatasetType.TEMPORARY_VIEW
+            table_details = None
         else:
             raise PySparkTypeError(
                 errorClass="UNSUPPORTED_PIPELINES_DATASET_TYPE",
@@ -89,15 +90,10 @@ def register_dataset(self, dataset: Dataset) -> None:
             dataset_name=dataset.name,
             dataset_type=dataset_type,
             comment=dataset.comment,
-            table_properties=table_properties,
-            partition_cols=partition_cols,
-            format=format,
+            table_details=table_details,
             source_code_location=source_code_location_to_proto(dataset.source_code_location),
-            # Even though schema_string is not required, the generated Python code seems to
-            # erroneously think it is required.
-            schema_string=schema_string,  # type: ignore[arg-type]
-            schema_data_type=schema_data_type,
         )
+
         command = pb2.Command()
         command.pipeline_command.define_dataset.CopyFrom(inner_command)
         self._client.execute_command(command)
@@ -107,11 +103,15 @@ def register_flow(self, flow: Flow) -> None:
             df = flow.func()
         relation = cast(ConnectDataFrame, df)._plan.plan(self._client)
 
+        relation_flow_details = pb2.PipelineCommand.DefineFlow.WriteRelationFlowDetails(
+            relation=relation,
+        )
+
         inner_command = pb2.PipelineCommand.DefineFlow(
             dataflow_graph_id=self._dataflow_graph_id,
             flow_name=flow.name,
             target_dataset_name=flow.target,
-            relation=relation,
+            relation_flow_details=relation_flow_details,
             sql_conf=flow.spark_conf,
             source_code_location=source_code_location_to_proto(flow.source_code_location),
         )

diff --git a/python/pyspark/sql/connect/proto/pipelines_pb2.py b/python/pyspark/sql/connect/proto/pipelines_pb2.py