diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala index 588e62768acc..74dd58f49032 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -2741,8 +2741,23 @@ class Dataset[T] private[sql] ( throw new UnsupportedOperationException("localCheckpoint is not implemented.") } + /** + * Returns `true` when the logical query plans inside both [[Dataset]]s are equal and therefore + * return same results. + * + * @note + * The equality comparison here is simplified by tolerating the cosmetic differences such as + * attribute names. + * @note + * This API can compare both [[Dataset]]s but can still return `false` on the [[Dataset]] that + * return the same results, for instance, from different plans. Such false negative semantic + * can be useful when caching as an example. This comparison may not be fast because it will + * execute a RPC call. + * @since 3.4.0 + */ + @DeveloperApi def sameSemantics(other: Dataset[T]): Boolean = { - throw new UnsupportedOperationException("sameSemantics is not implemented.") + sparkSession.sameSemantics(this.plan, other.plan) } def semanticHash(): Int = { diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala index 31a63720c5c2..85f576ec5155 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -400,6 +400,10 @@ class SparkSession private[sql] ( client.analyze(method, Some(plan), explainMode) } + private[sql] def sameSemantics(plan: proto.Plan, otherPlan: proto.Plan): Boolean = { + client.sameSemantics(plan, otherPlan).getSameSemantics.getResult + } + private[sql] def execute[T](plan: proto.Plan, encoder: AgnosticEncoder[T]): SparkResult[T] = { val value = client.execute(plan) val result = new SparkResult(value, allocator, encoder) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala index 8828a4a87e69..05aa191a4ddf 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala @@ -141,6 +141,20 @@ private[sql] class SparkConnectClient( builder.setSparkVersion(proto.AnalyzePlanRequest.SparkVersion.newBuilder().build()) case other => throw new IllegalArgumentException(s"Unknown Analyze request $other") } + analyze(builder) + } + + def sameSemantics(plan: proto.Plan, otherPlan: proto.Plan): proto.AnalyzePlanResponse = { + val builder = proto.AnalyzePlanRequest.newBuilder() + builder.setSameSemantics( + proto.AnalyzePlanRequest.SameSemantics + .newBuilder() + .setTargetPlan(plan) + .setOtherPlan(otherPlan)) + analyze(builder) + } + + private def analyze(builder: proto.AnalyzePlanRequest.Builder): proto.AnalyzePlanResponse = { val request = builder .setUserContext(userContext) .setClientId(sessionId) diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala index 089645a2d8d7..11e28f538e89 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala @@ -628,6 +628,12 @@ class ClientE2ETestSuite extends RemoteSparkSession { val result = spark.createDataFrame(data.asJava, schema).collect() assert(result === data) } + + test("SameSemantics") { + val plan = spark.sql("select 1") + val otherPlan = spark.sql("select 1") + assert(plan.sameSemantics(otherPlan)) + } } private[sql] case class MyType(id: Long, a: Double, b: Double) diff --git a/connector/connect/common/src/main/protobuf/spark/connect/base.proto b/connector/connect/common/src/main/protobuf/spark/connect/base.proto index 09407a99119c..2252d91c9ff7 100644 --- a/connector/connect/common/src/main/protobuf/spark/connect/base.proto +++ b/connector/connect/common/src/main/protobuf/spark/connect/base.proto @@ -79,6 +79,7 @@ message AnalyzePlanRequest { InputFiles input_files = 9; SparkVersion spark_version = 10; DDLParse ddl_parse = 11; + SameSemantics same_semantics = 12; } message Schema { @@ -145,6 +146,16 @@ message AnalyzePlanRequest { // (Required) The DDL formatted string to be parsed. string ddl_string = 1; } + + + // Returns `true` when the logical query plans are equal and therefore return same results. + message SameSemantics { + // (Required) The plan to be compared. + Plan target_plan = 1; + + // (Required) The other plan to be compared. + Plan other_plan = 2; + } } // Response to performing analysis of the query. Contains relevant metadata to be able to @@ -161,6 +172,7 @@ message AnalyzePlanResponse { InputFiles input_files = 7; SparkVersion spark_version = 8; DDLParse ddl_parse = 9; + SameSemantics same_semantics = 10; } message Schema { @@ -195,6 +207,10 @@ message AnalyzePlanResponse { message DDLParse { DataType parsed = 1; } + + message SameSemantics { + bool result = 1; + } } // A request to be executed by the service. diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAnalyzeHandler.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAnalyzeHandler.scala index f6adcd852e81..e3d4da66a087 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAnalyzeHandler.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAnalyzeHandler.scala @@ -140,6 +140,18 @@ private[connect] class SparkConnectAnalyzeHandler( .setParsed(DataTypeProtoConverter.toConnectProtoType(schema)) .build()) + case proto.AnalyzePlanRequest.AnalyzeCase.SAME_SEMANTICS => + val target = Dataset.ofRows( + session, + planner.transformRelation(request.getSameSemantics.getTargetPlan.getRoot)) + val other = Dataset.ofRows( + session, + planner.transformRelation(request.getSameSemantics.getOtherPlan.getRoot)) + builder.setSameSemantics( + proto.AnalyzePlanResponse.SameSemantics + .newBuilder() + .setResult(target.sameSemantics(other))) + case other => throw InvalidPlanInput(s"Unknown Analyze Method $other!") } diff --git a/python/pyspark/sql/connect/client.py b/python/pyspark/sql/connect/client.py index 8be0c9975381..2594640aa3e2 100644 --- a/python/pyspark/sql/connect/client.py +++ b/python/pyspark/sql/connect/client.py @@ -401,6 +401,7 @@ def __init__( input_files: Optional[List[str]], spark_version: Optional[str], parsed: Optional[pb2.DataType], + is_same_semantics: Optional[bool], ): self.schema = schema self.explain_string = explain_string @@ -410,6 +411,7 @@ def __init__( self.input_files = input_files self.spark_version = spark_version self.parsed = parsed + self.is_same_semantics = is_same_semantics @classmethod def fromProto(cls, pb: Any) -> "AnalyzeResult": @@ -421,6 +423,7 @@ def fromProto(cls, pb: Any) -> "AnalyzeResult": input_files: Optional[List[str]] = None spark_version: Optional[str] = None parsed: Optional[pb2.DataType] = None + is_same_semantics: Optional[bool] = None if pb.HasField("schema"): schema = pb.schema.schema @@ -438,6 +441,8 @@ def fromProto(cls, pb: Any) -> "AnalyzeResult": spark_version = pb.spark_version.version elif pb.HasField("ddl_parse"): parsed = pb.ddl_parse.parsed + elif pb.HasField("same_semantics"): + is_same_semantics = pb.same_semantics.result else: raise SparkConnectException("No analyze result found!") @@ -450,6 +455,7 @@ def fromProto(cls, pb: Any) -> "AnalyzeResult": input_files, spark_version, parsed, + is_same_semantics, ) @@ -690,6 +696,14 @@ def execute_command( else: return (None, properties) + def same_semantics(self, plan: pb2.Plan, other: pb2.Plan) -> bool: + """ + return if two plans have the same semantics. + """ + result = self._analyze(method="same_semantics", plan=plan, other=other).is_same_semantics + assert result is not None + return result + def close(self) -> None: """ Close the channel. @@ -765,6 +779,9 @@ def _analyze(self, method: str, **kwargs: Any) -> AnalyzeResult: req.spark_version.SetInParent() elif method == "ddl_parse": req.ddl_parse.ddl_string = cast(str, kwargs.get("ddl_string")) + elif method == "same_semantics": + req.same_semantics.target_plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan"))) + req.same_semantics.other_plan.CopyFrom(cast(pb2.Plan, kwargs.get("other"))) else: raise ValueError(f"Unknown Analyze method: {method}") diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index fdc71d466327..38e245f0335f 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -1610,8 +1610,15 @@ def _repr_html_(self, *args: Any, **kwargs: Any) -> None: def semanticHash(self, *args: Any, **kwargs: Any) -> None: raise NotImplementedError("semanticHash() is not implemented.") - def sameSemantics(self, *args: Any, **kwargs: Any) -> None: - raise NotImplementedError("sameSemantics() is not implemented.") + def sameSemantics(self, other: "DataFrame") -> bool: + assert self._plan is not None + assert other._plan is not None + return self._session.client.same_semantics( + plan=self._plan.to_proto(self._session.client), + other=other._plan.to_proto(other._session.client), + ) + + sameSemantics.__doc__ = PySparkDataFrame.sameSemantics.__doc__ def writeTo(self, table: str) -> "DataFrameWriterV2": assert self._plan is not None diff --git a/python/pyspark/sql/connect/proto/base_pb2.py b/python/pyspark/sql/connect/proto/base_pb2.py index 9ece82ed5357..6d41ce28c7c3 100644 --- a/python/pyspark/sql/connect/proto/base_pb2.py +++ b/python/pyspark/sql/connect/proto/base_pb2.py @@ -37,7 +37,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xa2\x0b\n\x12\x41nalyzePlanRequest\x12\x1b\n\tclient_id\x18\x01 \x01(\tR\x08\x63lientId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1a\x35\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlStringB\t\n\x07\x61nalyzeB\x0e\n\x0c_client_type"\xae\x08\n\x13\x41nalyzePlanResponse\x12\x1b\n\tclient_id\x18\x01 \x01(\tR\x08\x63lientId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsedB\x08\n\x06result"\xcf\x01\n\x12\x45xecutePlanRequest\x12\x1b\n\tclient_id\x18\x01 \x01(\tR\x08\x63lientId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"\xc8\t\n\x13\x45xecutePlanResponse\x12\x1b\n\tclient_id\x18\x01 \x01(\tR\x08\x63lientId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1a=\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1a`\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06valuesB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x82\x08\n\rConfigRequest\x12\x1b\n\tclient_id\x18\x01 \x01(\tR\x08\x63lientId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB\x0e\n\x0c_client_type"x\n\x0e\x43onfigResponse\x12\x1b\n\tclient_id\x18\x01 \x01(\tR\x08\x63lientId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xaf\x06\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1b\n\tclient_id\x18\x01 \x01(\tR\x08\x63lientId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payload"\xbc\x01\n\x14\x41\x64\x64\x41rtifactsResponse\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful2\xed\x02\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x42"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3' + b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xf7\x0c\n\x12\x41nalyzePlanRequest\x12\x1b\n\tclient_id\x18\x01 \x01(\tR\x08\x63lientId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1a\x35\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlanB\t\n\x07\x61nalyzeB\x0e\n\x0c_client_type"\xb2\t\n\x13\x41nalyzePlanResponse\x12\x1b\n\tclient_id\x18\x01 \x01(\tR\x08\x63lientId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06resultB\x08\n\x06result"\xcf\x01\n\x12\x45xecutePlanRequest\x12\x1b\n\tclient_id\x18\x01 \x01(\tR\x08\x63lientId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"\xc8\t\n\x13\x45xecutePlanResponse\x12\x1b\n\tclient_id\x18\x01 \x01(\tR\x08\x63lientId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1a=\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1a`\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06valuesB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x82\x08\n\rConfigRequest\x12\x1b\n\tclient_id\x18\x01 \x01(\tR\x08\x63lientId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB\x0e\n\x0c_client_type"x\n\x0e\x43onfigResponse\x12\x1b\n\tclient_id\x18\x01 \x01(\tR\x08\x63lientId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xaf\x06\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1b\n\tclient_id\x18\x01 \x01(\tR\x08\x63lientId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payload"\xbc\x01\n\x14\x41\x64\x64\x41rtifactsResponse\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful2\xed\x02\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x42"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3' ) @@ -52,6 +52,7 @@ _ANALYZEPLANREQUEST_INPUTFILES = _ANALYZEPLANREQUEST.nested_types_by_name["InputFiles"] _ANALYZEPLANREQUEST_SPARKVERSION = _ANALYZEPLANREQUEST.nested_types_by_name["SparkVersion"] _ANALYZEPLANREQUEST_DDLPARSE = _ANALYZEPLANREQUEST.nested_types_by_name["DDLParse"] +_ANALYZEPLANREQUEST_SAMESEMANTICS = _ANALYZEPLANREQUEST.nested_types_by_name["SameSemantics"] _ANALYZEPLANRESPONSE = DESCRIPTOR.message_types_by_name["AnalyzePlanResponse"] _ANALYZEPLANRESPONSE_SCHEMA = _ANALYZEPLANRESPONSE.nested_types_by_name["Schema"] _ANALYZEPLANRESPONSE_EXPLAIN = _ANALYZEPLANRESPONSE.nested_types_by_name["Explain"] @@ -61,6 +62,7 @@ _ANALYZEPLANRESPONSE_INPUTFILES = _ANALYZEPLANRESPONSE.nested_types_by_name["InputFiles"] _ANALYZEPLANRESPONSE_SPARKVERSION = _ANALYZEPLANRESPONSE.nested_types_by_name["SparkVersion"] _ANALYZEPLANRESPONSE_DDLPARSE = _ANALYZEPLANRESPONSE.nested_types_by_name["DDLParse"] +_ANALYZEPLANRESPONSE_SAMESEMANTICS = _ANALYZEPLANRESPONSE.nested_types_by_name["SameSemantics"] _EXECUTEPLANREQUEST = DESCRIPTOR.message_types_by_name["ExecutePlanRequest"] _EXECUTEPLANRESPONSE = DESCRIPTOR.message_types_by_name["ExecutePlanResponse"] _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT = _EXECUTEPLANRESPONSE.nested_types_by_name[ @@ -203,6 +205,15 @@ # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanRequest.DDLParse) }, ), + "SameSemantics": _reflection.GeneratedProtocolMessageType( + "SameSemantics", + (_message.Message,), + { + "DESCRIPTOR": _ANALYZEPLANREQUEST_SAMESEMANTICS, + "__module__": "spark.connect.base_pb2" + # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanRequest.SameSemantics) + }, + ), "DESCRIPTOR": _ANALYZEPLANREQUEST, "__module__": "spark.connect.base_pb2" # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanRequest) @@ -217,6 +228,7 @@ _sym_db.RegisterMessage(AnalyzePlanRequest.InputFiles) _sym_db.RegisterMessage(AnalyzePlanRequest.SparkVersion) _sym_db.RegisterMessage(AnalyzePlanRequest.DDLParse) +_sym_db.RegisterMessage(AnalyzePlanRequest.SameSemantics) AnalyzePlanResponse = _reflection.GeneratedProtocolMessageType( "AnalyzePlanResponse", @@ -294,6 +306,15 @@ # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanResponse.DDLParse) }, ), + "SameSemantics": _reflection.GeneratedProtocolMessageType( + "SameSemantics", + (_message.Message,), + { + "DESCRIPTOR": _ANALYZEPLANRESPONSE_SAMESEMANTICS, + "__module__": "spark.connect.base_pb2" + # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanResponse.SameSemantics) + }, + ), "DESCRIPTOR": _ANALYZEPLANRESPONSE, "__module__": "spark.connect.base_pb2" # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanResponse) @@ -308,6 +329,7 @@ _sym_db.RegisterMessage(AnalyzePlanResponse.InputFiles) _sym_db.RegisterMessage(AnalyzePlanResponse.SparkVersion) _sym_db.RegisterMessage(AnalyzePlanResponse.DDLParse) +_sym_db.RegisterMessage(AnalyzePlanResponse.SameSemantics) ExecutePlanRequest = _reflection.GeneratedProtocolMessageType( "ExecutePlanRequest", @@ -598,97 +620,101 @@ _USERCONTEXT._serialized_start = 309 _USERCONTEXT._serialized_end = 431 _ANALYZEPLANREQUEST._serialized_start = 434 - _ANALYZEPLANREQUEST._serialized_end = 1876 - _ANALYZEPLANREQUEST_SCHEMA._serialized_start = 1205 - _ANALYZEPLANREQUEST_SCHEMA._serialized_end = 1254 - _ANALYZEPLANREQUEST_EXPLAIN._serialized_start = 1257 - _ANALYZEPLANREQUEST_EXPLAIN._serialized_end = 1572 - _ANALYZEPLANREQUEST_EXPLAIN_EXPLAINMODE._serialized_start = 1400 - _ANALYZEPLANREQUEST_EXPLAIN_EXPLAINMODE._serialized_end = 1572 - _ANALYZEPLANREQUEST_TREESTRING._serialized_start = 1574 - _ANALYZEPLANREQUEST_TREESTRING._serialized_end = 1627 - _ANALYZEPLANREQUEST_ISLOCAL._serialized_start = 1629 - _ANALYZEPLANREQUEST_ISLOCAL._serialized_end = 1679 - _ANALYZEPLANREQUEST_ISSTREAMING._serialized_start = 1681 - _ANALYZEPLANREQUEST_ISSTREAMING._serialized_end = 1735 - _ANALYZEPLANREQUEST_INPUTFILES._serialized_start = 1737 - _ANALYZEPLANREQUEST_INPUTFILES._serialized_end = 1790 - _ANALYZEPLANREQUEST_SPARKVERSION._serialized_start = 1792 - _ANALYZEPLANREQUEST_SPARKVERSION._serialized_end = 1806 - _ANALYZEPLANREQUEST_DDLPARSE._serialized_start = 1808 - _ANALYZEPLANREQUEST_DDLPARSE._serialized_end = 1849 - _ANALYZEPLANRESPONSE._serialized_start = 1879 - _ANALYZEPLANRESPONSE._serialized_end = 2949 - _ANALYZEPLANRESPONSE_SCHEMA._serialized_start = 2558 - _ANALYZEPLANRESPONSE_SCHEMA._serialized_end = 2615 - _ANALYZEPLANRESPONSE_EXPLAIN._serialized_start = 2617 - _ANALYZEPLANRESPONSE_EXPLAIN._serialized_end = 2665 - _ANALYZEPLANRESPONSE_TREESTRING._serialized_start = 2667 - _ANALYZEPLANRESPONSE_TREESTRING._serialized_end = 2712 - _ANALYZEPLANRESPONSE_ISLOCAL._serialized_start = 2714 - _ANALYZEPLANRESPONSE_ISLOCAL._serialized_end = 2750 - _ANALYZEPLANRESPONSE_ISSTREAMING._serialized_start = 2752 - _ANALYZEPLANRESPONSE_ISSTREAMING._serialized_end = 2800 - _ANALYZEPLANRESPONSE_INPUTFILES._serialized_start = 2802 - _ANALYZEPLANRESPONSE_INPUTFILES._serialized_end = 2836 - _ANALYZEPLANRESPONSE_SPARKVERSION._serialized_start = 2838 - _ANALYZEPLANRESPONSE_SPARKVERSION._serialized_end = 2878 - _ANALYZEPLANRESPONSE_DDLPARSE._serialized_start = 2880 - _ANALYZEPLANRESPONSE_DDLPARSE._serialized_end = 2939 - _EXECUTEPLANREQUEST._serialized_start = 2952 - _EXECUTEPLANREQUEST._serialized_end = 3159 - _EXECUTEPLANRESPONSE._serialized_start = 3162 - _EXECUTEPLANRESPONSE._serialized_end = 4386 - _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_start = 3617 - _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_end = 3688 - _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_start = 3690 - _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_end = 3751 - _EXECUTEPLANRESPONSE_METRICS._serialized_start = 3754 - _EXECUTEPLANRESPONSE_METRICS._serialized_end = 4271 - _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_start = 3849 - _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_end = 4181 - _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_start = 4058 - _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_end = 4181 - _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_start = 4183 - _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_end = 4271 - _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_start = 4273 - _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_end = 4369 - _KEYVALUE._serialized_start = 4388 - _KEYVALUE._serialized_end = 4453 - _CONFIGREQUEST._serialized_start = 4456 - _CONFIGREQUEST._serialized_end = 5482 - _CONFIGREQUEST_OPERATION._serialized_start = 4674 - _CONFIGREQUEST_OPERATION._serialized_end = 5172 - _CONFIGREQUEST_SET._serialized_start = 5174 - _CONFIGREQUEST_SET._serialized_end = 5226 - _CONFIGREQUEST_GET._serialized_start = 5228 - _CONFIGREQUEST_GET._serialized_end = 5253 - _CONFIGREQUEST_GETWITHDEFAULT._serialized_start = 5255 - _CONFIGREQUEST_GETWITHDEFAULT._serialized_end = 5318 - _CONFIGREQUEST_GETOPTION._serialized_start = 5320 - _CONFIGREQUEST_GETOPTION._serialized_end = 5351 - _CONFIGREQUEST_GETALL._serialized_start = 5353 - _CONFIGREQUEST_GETALL._serialized_end = 5401 - _CONFIGREQUEST_UNSET._serialized_start = 5403 - _CONFIGREQUEST_UNSET._serialized_end = 5430 - _CONFIGREQUEST_ISMODIFIABLE._serialized_start = 5432 - _CONFIGREQUEST_ISMODIFIABLE._serialized_end = 5466 - _CONFIGRESPONSE._serialized_start = 5484 - _CONFIGRESPONSE._serialized_end = 5604 - _ADDARTIFACTSREQUEST._serialized_start = 5607 - _ADDARTIFACTSREQUEST._serialized_end = 6422 - _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_start = 5954 - _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_end = 6007 - _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_start = 6009 - _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_end = 6120 - _ADDARTIFACTSREQUEST_BATCH._serialized_start = 6122 - _ADDARTIFACTSREQUEST_BATCH._serialized_end = 6215 - _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_start = 6218 - _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_end = 6411 - _ADDARTIFACTSRESPONSE._serialized_start = 6425 - _ADDARTIFACTSRESPONSE._serialized_end = 6613 - _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_start = 6532 - _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_end = 6613 - _SPARKCONNECTSERVICE._serialized_start = 6616 - _SPARKCONNECTSERVICE._serialized_end = 6981 + _ANALYZEPLANREQUEST._serialized_end = 2089 + _ANALYZEPLANREQUEST_SCHEMA._serialized_start = 1295 + _ANALYZEPLANREQUEST_SCHEMA._serialized_end = 1344 + _ANALYZEPLANREQUEST_EXPLAIN._serialized_start = 1347 + _ANALYZEPLANREQUEST_EXPLAIN._serialized_end = 1662 + _ANALYZEPLANREQUEST_EXPLAIN_EXPLAINMODE._serialized_start = 1490 + _ANALYZEPLANREQUEST_EXPLAIN_EXPLAINMODE._serialized_end = 1662 + _ANALYZEPLANREQUEST_TREESTRING._serialized_start = 1664 + _ANALYZEPLANREQUEST_TREESTRING._serialized_end = 1717 + _ANALYZEPLANREQUEST_ISLOCAL._serialized_start = 1719 + _ANALYZEPLANREQUEST_ISLOCAL._serialized_end = 1769 + _ANALYZEPLANREQUEST_ISSTREAMING._serialized_start = 1771 + _ANALYZEPLANREQUEST_ISSTREAMING._serialized_end = 1825 + _ANALYZEPLANREQUEST_INPUTFILES._serialized_start = 1827 + _ANALYZEPLANREQUEST_INPUTFILES._serialized_end = 1880 + _ANALYZEPLANREQUEST_SPARKVERSION._serialized_start = 1882 + _ANALYZEPLANREQUEST_SPARKVERSION._serialized_end = 1896 + _ANALYZEPLANREQUEST_DDLPARSE._serialized_start = 1898 + _ANALYZEPLANREQUEST_DDLPARSE._serialized_end = 1939 + _ANALYZEPLANREQUEST_SAMESEMANTICS._serialized_start = 1941 + _ANALYZEPLANREQUEST_SAMESEMANTICS._serialized_end = 2062 + _ANALYZEPLANRESPONSE._serialized_start = 2092 + _ANALYZEPLANRESPONSE._serialized_end = 3294 + _ANALYZEPLANRESPONSE_SCHEMA._serialized_start = 2862 + _ANALYZEPLANRESPONSE_SCHEMA._serialized_end = 2919 + _ANALYZEPLANRESPONSE_EXPLAIN._serialized_start = 2921 + _ANALYZEPLANRESPONSE_EXPLAIN._serialized_end = 2969 + _ANALYZEPLANRESPONSE_TREESTRING._serialized_start = 2971 + _ANALYZEPLANRESPONSE_TREESTRING._serialized_end = 3016 + _ANALYZEPLANRESPONSE_ISLOCAL._serialized_start = 3018 + _ANALYZEPLANRESPONSE_ISLOCAL._serialized_end = 3054 + _ANALYZEPLANRESPONSE_ISSTREAMING._serialized_start = 3056 + _ANALYZEPLANRESPONSE_ISSTREAMING._serialized_end = 3104 + _ANALYZEPLANRESPONSE_INPUTFILES._serialized_start = 3106 + _ANALYZEPLANRESPONSE_INPUTFILES._serialized_end = 3140 + _ANALYZEPLANRESPONSE_SPARKVERSION._serialized_start = 3142 + _ANALYZEPLANRESPONSE_SPARKVERSION._serialized_end = 3182 + _ANALYZEPLANRESPONSE_DDLPARSE._serialized_start = 3184 + _ANALYZEPLANRESPONSE_DDLPARSE._serialized_end = 3243 + _ANALYZEPLANRESPONSE_SAMESEMANTICS._serialized_start = 3245 + _ANALYZEPLANRESPONSE_SAMESEMANTICS._serialized_end = 3284 + _EXECUTEPLANREQUEST._serialized_start = 3297 + _EXECUTEPLANREQUEST._serialized_end = 3504 + _EXECUTEPLANRESPONSE._serialized_start = 3507 + _EXECUTEPLANRESPONSE._serialized_end = 4731 + _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_start = 3962 + _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_end = 4033 + _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_start = 4035 + _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_end = 4096 + _EXECUTEPLANRESPONSE_METRICS._serialized_start = 4099 + _EXECUTEPLANRESPONSE_METRICS._serialized_end = 4616 + _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_start = 4194 + _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_end = 4526 + _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_start = 4403 + _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_end = 4526 + _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_start = 4528 + _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_end = 4616 + _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_start = 4618 + _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_end = 4714 + _KEYVALUE._serialized_start = 4733 + _KEYVALUE._serialized_end = 4798 + _CONFIGREQUEST._serialized_start = 4801 + _CONFIGREQUEST._serialized_end = 5827 + _CONFIGREQUEST_OPERATION._serialized_start = 5019 + _CONFIGREQUEST_OPERATION._serialized_end = 5517 + _CONFIGREQUEST_SET._serialized_start = 5519 + _CONFIGREQUEST_SET._serialized_end = 5571 + _CONFIGREQUEST_GET._serialized_start = 5573 + _CONFIGREQUEST_GET._serialized_end = 5598 + _CONFIGREQUEST_GETWITHDEFAULT._serialized_start = 5600 + _CONFIGREQUEST_GETWITHDEFAULT._serialized_end = 5663 + _CONFIGREQUEST_GETOPTION._serialized_start = 5665 + _CONFIGREQUEST_GETOPTION._serialized_end = 5696 + _CONFIGREQUEST_GETALL._serialized_start = 5698 + _CONFIGREQUEST_GETALL._serialized_end = 5746 + _CONFIGREQUEST_UNSET._serialized_start = 5748 + _CONFIGREQUEST_UNSET._serialized_end = 5775 + _CONFIGREQUEST_ISMODIFIABLE._serialized_start = 5777 + _CONFIGREQUEST_ISMODIFIABLE._serialized_end = 5811 + _CONFIGRESPONSE._serialized_start = 5829 + _CONFIGRESPONSE._serialized_end = 5949 + _ADDARTIFACTSREQUEST._serialized_start = 5952 + _ADDARTIFACTSREQUEST._serialized_end = 6767 + _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_start = 6299 + _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_end = 6352 + _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_start = 6354 + _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_end = 6465 + _ADDARTIFACTSREQUEST_BATCH._serialized_start = 6467 + _ADDARTIFACTSREQUEST_BATCH._serialized_end = 6560 + _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_start = 6563 + _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_end = 6756 + _ADDARTIFACTSRESPONSE._serialized_start = 6770 + _ADDARTIFACTSRESPONSE._serialized_end = 6958 + _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_start = 6877 + _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_end = 6958 + _SPARKCONNECTSERVICE._serialized_start = 6961 + _SPARKCONNECTSERVICE._serialized_end = 7326 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/connect/proto/base_pb2.pyi b/python/pyspark/sql/connect/proto/base_pb2.pyi index 56e1d2e416fa..2e9a877b658b 100644 --- a/python/pyspark/sql/connect/proto/base_pb2.pyi +++ b/python/pyspark/sql/connect/proto/base_pb2.pyi @@ -318,6 +318,38 @@ class AnalyzePlanRequest(google.protobuf.message.Message): self, field_name: typing_extensions.Literal["ddl_string", b"ddl_string"] ) -> None: ... + class SameSemantics(google.protobuf.message.Message): + """Returns `true` when the logical query plans are equal and therefore return same results.""" + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + TARGET_PLAN_FIELD_NUMBER: builtins.int + OTHER_PLAN_FIELD_NUMBER: builtins.int + @property + def target_plan(self) -> global___Plan: + """(Required) The plan to be compared.""" + @property + def other_plan(self) -> global___Plan: + """(Required) The other plan to be compared.""" + def __init__( + self, + *, + target_plan: global___Plan | None = ..., + other_plan: global___Plan | None = ..., + ) -> None: ... + def HasField( + self, + field_name: typing_extensions.Literal[ + "other_plan", b"other_plan", "target_plan", b"target_plan" + ], + ) -> builtins.bool: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "other_plan", b"other_plan", "target_plan", b"target_plan" + ], + ) -> None: ... + CLIENT_ID_FIELD_NUMBER: builtins.int USER_CONTEXT_FIELD_NUMBER: builtins.int CLIENT_TYPE_FIELD_NUMBER: builtins.int @@ -329,6 +361,7 @@ class AnalyzePlanRequest(google.protobuf.message.Message): INPUT_FILES_FIELD_NUMBER: builtins.int SPARK_VERSION_FIELD_NUMBER: builtins.int DDL_PARSE_FIELD_NUMBER: builtins.int + SAME_SEMANTICS_FIELD_NUMBER: builtins.int client_id: builtins.str """(Required) @@ -359,6 +392,8 @@ class AnalyzePlanRequest(google.protobuf.message.Message): def spark_version(self) -> global___AnalyzePlanRequest.SparkVersion: ... @property def ddl_parse(self) -> global___AnalyzePlanRequest.DDLParse: ... + @property + def same_semantics(self) -> global___AnalyzePlanRequest.SameSemantics: ... def __init__( self, *, @@ -373,6 +408,7 @@ class AnalyzePlanRequest(google.protobuf.message.Message): input_files: global___AnalyzePlanRequest.InputFiles | None = ..., spark_version: global___AnalyzePlanRequest.SparkVersion | None = ..., ddl_parse: global___AnalyzePlanRequest.DDLParse | None = ..., + same_semantics: global___AnalyzePlanRequest.SameSemantics | None = ..., ) -> None: ... def HasField( self, @@ -393,6 +429,8 @@ class AnalyzePlanRequest(google.protobuf.message.Message): b"is_local", "is_streaming", b"is_streaming", + "same_semantics", + b"same_semantics", "schema", b"schema", "spark_version", @@ -424,6 +462,8 @@ class AnalyzePlanRequest(google.protobuf.message.Message): b"is_local", "is_streaming", b"is_streaming", + "same_semantics", + b"same_semantics", "schema", b"schema", "spark_version", @@ -450,6 +490,7 @@ class AnalyzePlanRequest(google.protobuf.message.Message): "input_files", "spark_version", "ddl_parse", + "same_semantics", ] | None: ... global___AnalyzePlanRequest = AnalyzePlanRequest @@ -583,6 +624,20 @@ class AnalyzePlanResponse(google.protobuf.message.Message): self, field_name: typing_extensions.Literal["parsed", b"parsed"] ) -> None: ... + class SameSemantics(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + RESULT_FIELD_NUMBER: builtins.int + result: builtins.bool + def __init__( + self, + *, + result: builtins.bool = ..., + ) -> None: ... + def ClearField( + self, field_name: typing_extensions.Literal["result", b"result"] + ) -> None: ... + CLIENT_ID_FIELD_NUMBER: builtins.int SCHEMA_FIELD_NUMBER: builtins.int EXPLAIN_FIELD_NUMBER: builtins.int @@ -592,6 +647,7 @@ class AnalyzePlanResponse(google.protobuf.message.Message): INPUT_FILES_FIELD_NUMBER: builtins.int SPARK_VERSION_FIELD_NUMBER: builtins.int DDL_PARSE_FIELD_NUMBER: builtins.int + SAME_SEMANTICS_FIELD_NUMBER: builtins.int client_id: builtins.str @property def schema(self) -> global___AnalyzePlanResponse.Schema: ... @@ -609,6 +665,8 @@ class AnalyzePlanResponse(google.protobuf.message.Message): def spark_version(self) -> global___AnalyzePlanResponse.SparkVersion: ... @property def ddl_parse(self) -> global___AnalyzePlanResponse.DDLParse: ... + @property + def same_semantics(self) -> global___AnalyzePlanResponse.SameSemantics: ... def __init__( self, *, @@ -621,6 +679,7 @@ class AnalyzePlanResponse(google.protobuf.message.Message): input_files: global___AnalyzePlanResponse.InputFiles | None = ..., spark_version: global___AnalyzePlanResponse.SparkVersion | None = ..., ddl_parse: global___AnalyzePlanResponse.DDLParse | None = ..., + same_semantics: global___AnalyzePlanResponse.SameSemantics | None = ..., ) -> None: ... def HasField( self, @@ -637,6 +696,8 @@ class AnalyzePlanResponse(google.protobuf.message.Message): b"is_streaming", "result", b"result", + "same_semantics", + b"same_semantics", "schema", b"schema", "spark_version", @@ -662,6 +723,8 @@ class AnalyzePlanResponse(google.protobuf.message.Message): b"is_streaming", "result", b"result", + "same_semantics", + b"same_semantics", "schema", b"schema", "spark_version", @@ -681,6 +744,7 @@ class AnalyzePlanResponse(google.protobuf.message.Message): "input_files", "spark_version", "ddl_parse", + "same_semantics", ] | None: ... global___AnalyzePlanResponse = AnalyzePlanResponse diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py index 68ac8b1dd7c2..806fe6e2329b 100644 --- a/python/pyspark/sql/tests/connect/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -2809,6 +2809,11 @@ def test_version(self): self.spark.version, ) + def test_same_semantics(self): + plan = self.connect.sql("SELECT 1") + other = self.connect.sql("SELECT 1") + self.assertTrue(plan.sameSemantics(other)) + def test_unsupported_functions(self): # SPARK-41225: Disable unsupported functions. df = self.connect.read.table(self.tbl_name) @@ -2825,7 +2830,6 @@ def test_unsupported_functions(self): "localCheckpoint", "_repr_html_", "semanticHash", - "sameSemantics", ): with self.assertRaises(NotImplementedError): getattr(df, f)() diff --git a/python/pyspark/sql/tests/connect/test_parity_dataframe.py b/python/pyspark/sql/tests/connect/test_parity_dataframe.py index 79626586f731..31dee6a19d21 100644 --- a/python/pyspark/sql/tests/connect/test_parity_dataframe.py +++ b/python/pyspark/sql/tests/connect/test_parity_dataframe.py @@ -60,8 +60,7 @@ def test_repartitionByRange_dataframe(self): def test_repr_behaviors(self): super().test_repr_behaviors() - # TODO(SPARK-41874): Implement DataFrame `sameSemantics` - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("Spark Connect does not SparkContext but the tests depend on them.") def test_same_semantics_error(self): super().test_same_semantics_error()