From 16d126f0a330d0304ed12de5bd6abd44aa6c9b22 Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Fri, 14 Jul 2023 11:53:13 +0200 Subject: [PATCH 01/21] protos --- .../main/protobuf/spark/connect/base.proto | 36 ++++- python/pyspark/sql/connect/proto/base_pb2.py | 144 +++++++++--------- python/pyspark/sql/connect/proto/base_pb2.pyi | 92 ++++++++++- 3 files changed, 196 insertions(+), 76 deletions(-) diff --git a/connector/connect/common/src/main/protobuf/spark/connect/base.proto b/connector/connect/common/src/main/protobuf/spark/connect/base.proto index e869712858a3..541ef6b8d941 100644 --- a/connector/connect/common/src/main/protobuf/spark/connect/base.proto +++ b/connector/connect/common/src/main/protobuf/spark/connect/base.proto @@ -281,6 +281,12 @@ message ExecutePlanRequest { // server side. UserContext user_context = 2; + // (Optional) + // Provide an id for this request. If not provided, it will be generated by the server. + // It is returned in every ExecutePlanResponse.operation_id of the ExecutePlan response stream. + // The id must be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + optional string operation_id = 6; + // (Required) The logical plan to be executed / analyzed. Plan plan = 3; @@ -299,6 +305,10 @@ message ExecutePlanRequest { google.protobuf.Any extension = 999; } } + + // Tags to tag the given execution with. + // Used by Interrupt with interrupt.tag. + repeated string tags = 7; } // The response of a query, can be one or more for each request. Responses belonging to the @@ -306,6 +316,12 @@ message ExecutePlanRequest { message ExecutePlanResponse { string session_id = 1; + // Identifies the ExecutePlan execution. + // If set by the client in ExecutePlanRequest.operationId, that value is returned. + // Otherwise generated by the server. + // It is an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + string operation_id = 12; + // Union type for the different response messages. oneof response_type { ArrowBatch arrow_batch = 2; @@ -616,13 +632,31 @@ message InterruptRequest { enum InterruptType { INTERRUPT_TYPE_UNSPECIFIED = 0; - // Interrupt all running executions within session with provided session_id. + // Interrupt all running executions within the session with provided the session_id. INTERRUPT_TYPE_ALL = 1; + + // Interrupt all running executions within the session with the provided tag. + INTERRUPT_TYPE_TAG = 2; + + // Interrupt the running execution within the session with the provided id. + INTERRUPT_TYPE_ID = 3; + } + + oneof interrupt { + // if interrupt_tag == INTERRUPT_TYPE_TAG, interrupt operation with this tag. + string operation_tag = 5; + + // if interrupt_tag == INTERRUPT_TYPE_ID, interrupt operation with this operation_id. + string operation_id = 6; } } message InterruptResponse { + // Session id in which the interrupt was running. string session_id = 1; + + // Operation ids of the executions which were interrupted. + repeated string interrupted_ids = 2; } // Main interface for the SparkConnect service. diff --git a/python/pyspark/sql/connect/proto/base_pb2.py b/python/pyspark/sql/connect/proto/base_pb2.py index 7bf93ed58fa8..d210b26e3670 100644 --- a/python/pyspark/sql/connect/proto/base_pb2.py +++ b/python/pyspark/sql/connect/proto/base_pb2.py @@ -37,7 +37,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xf5\x12\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x12\x45\n\x07persist\x18\x0e \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.PersistH\x00R\x07persist\x12K\n\tunpersist\x18\x0f \x01(\x0b\x32+.spark.connect.AnalyzePlanRequest.UnpersistH\x00R\tunpersist\x12_\n\x11get_storage_level\x18\x10 \x01(\x0b\x32\x31.spark.connect.AnalyzePlanRequest.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1aZ\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12\x19\n\x05level\x18\x02 \x01(\x05H\x00R\x05level\x88\x01\x01\x42\x08\n\x06_level\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x97\x01\n\x07Persist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x45\n\rstorage_level\x18\x02 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level\x1an\n\tUnpersist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x1f\n\x08\x62locking\x18\x02 \x01(\x08H\x00R\x08\x62locking\x88\x01\x01\x42\x0b\n\t_blocking\x1a\x46\n\x0fGetStorageLevel\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relationB\t\n\x07\x61nalyzeB\x0e\n\x0c_client_type"\x99\r\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x12\x46\n\x07persist\x18\x0c \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.PersistH\x00R\x07persist\x12L\n\tunpersist\x18\r \x01(\x0b\x32,.spark.connect.AnalyzePlanResponse.UnpersistH\x00R\tunpersist\x12`\n\x11get_storage_level\x18\x0e \x01(\x0b\x32\x32.spark.connect.AnalyzePlanResponse.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06result\x1a\t\n\x07Persist\x1a\x0b\n\tUnpersist\x1aS\n\x0fGetStorageLevel\x12@\n\rstorage_level\x18\x01 \x01(\x0b\x32\x1b.spark.connect.StorageLevelR\x0cstorageLevelB\x08\n\x06result"\x85\x03\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x12X\n\x0frequest_options\x18\x05 \x03(\x0b\x32/.spark.connect.ExecutePlanRequest.RequestOptionR\x0erequestOptions\x1aX\n\rRequestOption\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x10\n\x0erequest_optionB\x0e\n\x0c_client_type"\xe5\r\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12~\n#write_stream_operation_start_result\x18\x08 \x01(\x0b\x32..spark.connect.WriteStreamOperationStartResultH\x00R\x1fwriteStreamOperationStartResult\x12q\n\x1estreaming_query_command_result\x18\t \x01(\x0b\x32*.spark.connect.StreamingQueryCommandResultH\x00R\x1bstreamingQueryCommandResult\x12k\n\x1cget_resources_command_result\x18\n \x01(\x0b\x32(.spark.connect.GetResourcesCommandResultH\x00R\x19getResourcesCommandResult\x12\x87\x01\n&streaming_query_manager_command_result\x18\x0b \x01(\x0b\x32\x31.spark.connect.StreamingQueryManagerCommandResultH\x00R"streamingQueryManagerCommandResult\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x12/\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1a=\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1a`\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06valuesB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x84\x08\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB\x0e\n\x0c_client_type"z\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xe7\x06\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x06 \x01(\tH\x01R\nclientType\x88\x01\x01\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payloadB\x0e\n\x0c_client_type"\xbc\x01\n\x14\x41\x64\x64\x41rtifactsResponse\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful"\xc3\x01\n\x17\x41rtifactStatusesRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x12\x14\n\x05names\x18\x04 \x03(\tR\x05namesB\x0e\n\x0c_client_type"\x8c\x02\n\x18\x41rtifactStatusesResponse\x12Q\n\x08statuses\x18\x01 \x03(\x0b\x32\x35.spark.connect.ArtifactStatusesResponse.StatusesEntryR\x08statuses\x1a(\n\x0e\x41rtifactStatus\x12\x16\n\x06\x65xists\x18\x01 \x01(\x08R\x06\x65xists\x1as\n\rStatusesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ArtifactStatusesResponse.ArtifactStatusR\x05value:\x02\x38\x01"\xc5\x02\n\x10InterruptRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x12T\n\x0einterrupt_type\x18\x04 \x01(\x0e\x32-.spark.connect.InterruptRequest.InterruptTypeR\rinterruptType"G\n\rInterruptType\x12\x1e\n\x1aINTERRUPT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12INTERRUPT_TYPE_ALL\x10\x01\x42\x0e\n\x0c_client_type"2\n\x11InterruptResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId2\xa4\x04\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x12\x63\n\x0e\x41rtifactStatus\x12&.spark.connect.ArtifactStatusesRequest\x1a\'.spark.connect.ArtifactStatusesResponse"\x00\x12P\n\tInterrupt\x12\x1f.spark.connect.InterruptRequest\x1a .spark.connect.InterruptResponse"\x00\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' + b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xf5\x12\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x12\x45\n\x07persist\x18\x0e \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.PersistH\x00R\x07persist\x12K\n\tunpersist\x18\x0f \x01(\x0b\x32+.spark.connect.AnalyzePlanRequest.UnpersistH\x00R\tunpersist\x12_\n\x11get_storage_level\x18\x10 \x01(\x0b\x32\x31.spark.connect.AnalyzePlanRequest.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1aZ\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12\x19\n\x05level\x18\x02 \x01(\x05H\x00R\x05level\x88\x01\x01\x42\x08\n\x06_level\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x97\x01\n\x07Persist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x45\n\rstorage_level\x18\x02 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level\x1an\n\tUnpersist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x1f\n\x08\x62locking\x18\x02 \x01(\x08H\x00R\x08\x62locking\x88\x01\x01\x42\x0b\n\t_blocking\x1a\x46\n\x0fGetStorageLevel\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relationB\t\n\x07\x61nalyzeB\x0e\n\x0c_client_type"\x99\r\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x12\x46\n\x07persist\x18\x0c \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.PersistH\x00R\x07persist\x12L\n\tunpersist\x18\r \x01(\x0b\x32,.spark.connect.AnalyzePlanResponse.UnpersistH\x00R\tunpersist\x12`\n\x11get_storage_level\x18\x0e \x01(\x0b\x32\x32.spark.connect.AnalyzePlanResponse.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06result\x1a\t\n\x07Persist\x1a\x0b\n\tUnpersist\x1aS\n\x0fGetStorageLevel\x12@\n\rstorage_level\x18\x01 \x01(\x0b\x32\x1b.spark.connect.StorageLevelR\x0cstorageLevelB\x08\n\x06result"\xd2\x03\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12&\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12X\n\x0frequest_options\x18\x05 \x03(\x0b\x32/.spark.connect.ExecutePlanRequest.RequestOptionR\x0erequestOptions\x12\x12\n\x04tags\x18\x07 \x03(\tR\x04tags\x1aX\n\rRequestOption\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x10\n\x0erequest_optionB\x0f\n\r_operation_idB\x0e\n\x0c_client_type"\x88\x0e\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12!\n\x0coperation_id\x18\x0c \x01(\tR\x0boperationId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12~\n#write_stream_operation_start_result\x18\x08 \x01(\x0b\x32..spark.connect.WriteStreamOperationStartResultH\x00R\x1fwriteStreamOperationStartResult\x12q\n\x1estreaming_query_command_result\x18\t \x01(\x0b\x32*.spark.connect.StreamingQueryCommandResultH\x00R\x1bstreamingQueryCommandResult\x12k\n\x1cget_resources_command_result\x18\n \x01(\x0b\x32(.spark.connect.GetResourcesCommandResultH\x00R\x19getResourcesCommandResult\x12\x87\x01\n&streaming_query_manager_command_result\x18\x0b \x01(\x0b\x32\x31.spark.connect.StreamingQueryManagerCommandResultH\x00R"streamingQueryManagerCommandResult\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x12/\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1a=\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1a`\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06valuesB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x84\x08\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB\x0e\n\x0c_client_type"z\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xe7\x06\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x06 \x01(\tH\x01R\nclientType\x88\x01\x01\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payloadB\x0e\n\x0c_client_type"\xbc\x01\n\x14\x41\x64\x64\x41rtifactsResponse\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful"\xc3\x01\n\x17\x41rtifactStatusesRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x12\x14\n\x05names\x18\x04 \x03(\tR\x05namesB\x0e\n\x0c_client_type"\x8c\x02\n\x18\x41rtifactStatusesResponse\x12Q\n\x08statuses\x18\x01 \x03(\x0b\x32\x35.spark.connect.ArtifactStatusesResponse.StatusesEntryR\x08statuses\x1a(\n\x0e\x41rtifactStatus\x12\x16\n\x06\x65xists\x18\x01 \x01(\x08R\x06\x65xists\x1as\n\rStatusesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ArtifactStatusesResponse.ArtifactStatusR\x05value:\x02\x38\x01"\xcd\x03\n\x10InterruptRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12T\n\x0einterrupt_type\x18\x04 \x01(\x0e\x32-.spark.connect.InterruptRequest.InterruptTypeR\rinterruptType\x12%\n\roperation_tag\x18\x05 \x01(\tH\x00R\x0coperationTag\x12#\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId"v\n\rInterruptType\x12\x1e\n\x1aINTERRUPT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12INTERRUPT_TYPE_ALL\x10\x01\x12\x16\n\x12INTERRUPT_TYPE_TAG\x10\x02\x12\x15\n\x11INTERRUPT_TYPE_ID\x10\x03\x42\x0b\n\tinterruptB\x0e\n\x0c_client_type"[\n\x11InterruptResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\'\n\x0finterrupted_ids\x18\x02 \x03(\tR\x0einterruptedIds2\xa4\x04\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x12\x63\n\x0e\x41rtifactStatus\x12&.spark.connect.ArtifactStatusesRequest\x1a\'.spark.connect.ArtifactStatusesResponse"\x00\x12P\n\tInterrupt\x12\x1f.spark.connect.InterruptRequest\x1a .spark.connect.InterruptResponse"\x00\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' ) _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) @@ -115,75 +115,75 @@ _ANALYZEPLANRESPONSE_GETSTORAGELEVEL._serialized_start = 4482 _ANALYZEPLANRESPONSE_GETSTORAGELEVEL._serialized_end = 4565 _EXECUTEPLANREQUEST._serialized_start = 4578 - _EXECUTEPLANREQUEST._serialized_end = 4967 - _EXECUTEPLANREQUEST_REQUESTOPTION._serialized_start = 4863 - _EXECUTEPLANREQUEST_REQUESTOPTION._serialized_end = 4951 - _EXECUTEPLANRESPONSE._serialized_start = 4970 - _EXECUTEPLANRESPONSE._serialized_end = 6735 - _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_start = 5966 - _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_end = 6037 - _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_start = 6039 - _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_end = 6100 - _EXECUTEPLANRESPONSE_METRICS._serialized_start = 6103 - _EXECUTEPLANRESPONSE_METRICS._serialized_end = 6620 - _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_start = 6198 - _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_end = 6530 - _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_start = 6407 - _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_end = 6530 - _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_start = 6532 - _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_end = 6620 - _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_start = 6622 - _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_end = 6718 - _KEYVALUE._serialized_start = 6737 - _KEYVALUE._serialized_end = 6802 - _CONFIGREQUEST._serialized_start = 6805 - _CONFIGREQUEST._serialized_end = 7833 - _CONFIGREQUEST_OPERATION._serialized_start = 7025 - _CONFIGREQUEST_OPERATION._serialized_end = 7523 - _CONFIGREQUEST_SET._serialized_start = 7525 - _CONFIGREQUEST_SET._serialized_end = 7577 - _CONFIGREQUEST_GET._serialized_start = 7579 - _CONFIGREQUEST_GET._serialized_end = 7604 - _CONFIGREQUEST_GETWITHDEFAULT._serialized_start = 7606 - _CONFIGREQUEST_GETWITHDEFAULT._serialized_end = 7669 - _CONFIGREQUEST_GETOPTION._serialized_start = 7671 - _CONFIGREQUEST_GETOPTION._serialized_end = 7702 - _CONFIGREQUEST_GETALL._serialized_start = 7704 - _CONFIGREQUEST_GETALL._serialized_end = 7752 - _CONFIGREQUEST_UNSET._serialized_start = 7754 - _CONFIGREQUEST_UNSET._serialized_end = 7781 - _CONFIGREQUEST_ISMODIFIABLE._serialized_start = 7783 - _CONFIGREQUEST_ISMODIFIABLE._serialized_end = 7817 - _CONFIGRESPONSE._serialized_start = 7835 - _CONFIGRESPONSE._serialized_end = 7957 - _ADDARTIFACTSREQUEST._serialized_start = 7960 - _ADDARTIFACTSREQUEST._serialized_end = 8831 - _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_start = 8347 - _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_end = 8400 - _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_start = 8402 - _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_end = 8513 - _ADDARTIFACTSREQUEST_BATCH._serialized_start = 8515 - _ADDARTIFACTSREQUEST_BATCH._serialized_end = 8608 - _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_start = 8611 - _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_end = 8804 - _ADDARTIFACTSRESPONSE._serialized_start = 8834 - _ADDARTIFACTSRESPONSE._serialized_end = 9022 - _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_start = 8941 - _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_end = 9022 - _ARTIFACTSTATUSESREQUEST._serialized_start = 9025 - _ARTIFACTSTATUSESREQUEST._serialized_end = 9220 - _ARTIFACTSTATUSESRESPONSE._serialized_start = 9223 - _ARTIFACTSTATUSESRESPONSE._serialized_end = 9491 - _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_start = 9334 - _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_end = 9374 - _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_start = 9376 - _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_end = 9491 - _INTERRUPTREQUEST._serialized_start = 9494 - _INTERRUPTREQUEST._serialized_end = 9819 - _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_start = 9732 - _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_end = 9803 - _INTERRUPTRESPONSE._serialized_start = 9821 - _INTERRUPTRESPONSE._serialized_end = 9871 - _SPARKCONNECTSERVICE._serialized_start = 9874 - _SPARKCONNECTSERVICE._serialized_end = 10422 + _EXECUTEPLANREQUEST._serialized_end = 5044 + _EXECUTEPLANREQUEST_REQUESTOPTION._serialized_start = 4923 + _EXECUTEPLANREQUEST_REQUESTOPTION._serialized_end = 5011 + _EXECUTEPLANRESPONSE._serialized_start = 5047 + _EXECUTEPLANRESPONSE._serialized_end = 6847 + _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_start = 6078 + _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_end = 6149 + _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_start = 6151 + _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_end = 6212 + _EXECUTEPLANRESPONSE_METRICS._serialized_start = 6215 + _EXECUTEPLANRESPONSE_METRICS._serialized_end = 6732 + _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_start = 6310 + _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_end = 6642 + _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_start = 6519 + _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_end = 6642 + _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_start = 6644 + _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_end = 6732 + _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_start = 6734 + _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_end = 6830 + _KEYVALUE._serialized_start = 6849 + _KEYVALUE._serialized_end = 6914 + _CONFIGREQUEST._serialized_start = 6917 + _CONFIGREQUEST._serialized_end = 7945 + _CONFIGREQUEST_OPERATION._serialized_start = 7137 + _CONFIGREQUEST_OPERATION._serialized_end = 7635 + _CONFIGREQUEST_SET._serialized_start = 7637 + _CONFIGREQUEST_SET._serialized_end = 7689 + _CONFIGREQUEST_GET._serialized_start = 7691 + _CONFIGREQUEST_GET._serialized_end = 7716 + _CONFIGREQUEST_GETWITHDEFAULT._serialized_start = 7718 + _CONFIGREQUEST_GETWITHDEFAULT._serialized_end = 7781 + _CONFIGREQUEST_GETOPTION._serialized_start = 7783 + _CONFIGREQUEST_GETOPTION._serialized_end = 7814 + _CONFIGREQUEST_GETALL._serialized_start = 7816 + _CONFIGREQUEST_GETALL._serialized_end = 7864 + _CONFIGREQUEST_UNSET._serialized_start = 7866 + _CONFIGREQUEST_UNSET._serialized_end = 7893 + _CONFIGREQUEST_ISMODIFIABLE._serialized_start = 7895 + _CONFIGREQUEST_ISMODIFIABLE._serialized_end = 7929 + _CONFIGRESPONSE._serialized_start = 7947 + _CONFIGRESPONSE._serialized_end = 8069 + _ADDARTIFACTSREQUEST._serialized_start = 8072 + _ADDARTIFACTSREQUEST._serialized_end = 8943 + _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_start = 8459 + _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_end = 8512 + _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_start = 8514 + _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_end = 8625 + _ADDARTIFACTSREQUEST_BATCH._serialized_start = 8627 + _ADDARTIFACTSREQUEST_BATCH._serialized_end = 8720 + _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_start = 8723 + _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_end = 8916 + _ADDARTIFACTSRESPONSE._serialized_start = 8946 + _ADDARTIFACTSRESPONSE._serialized_end = 9134 + _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_start = 9053 + _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_end = 9134 + _ARTIFACTSTATUSESREQUEST._serialized_start = 9137 + _ARTIFACTSTATUSESREQUEST._serialized_end = 9332 + _ARTIFACTSTATUSESRESPONSE._serialized_start = 9335 + _ARTIFACTSTATUSESRESPONSE._serialized_end = 9603 + _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_start = 9446 + _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_end = 9486 + _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_start = 9488 + _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_end = 9603 + _INTERRUPTREQUEST._serialized_start = 9606 + _INTERRUPTREQUEST._serialized_end = 10067 + _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_start = 9920 + _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_end = 10038 + _INTERRUPTRESPONSE._serialized_start = 10069 + _INTERRUPTRESPONSE._serialized_end = 10160 + _SPARKCONNECTSERVICE._serialized_start = 10163 + _SPARKCONNECTSERVICE._serialized_end = 10711 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/connect/proto/base_pb2.pyi b/python/pyspark/sql/connect/proto/base_pb2.pyi index 633058f33ed4..728a23d6e879 100644 --- a/python/pyspark/sql/connect/proto/base_pb2.pyi +++ b/python/pyspark/sql/connect/proto/base_pb2.pyi @@ -1031,9 +1031,11 @@ class ExecutePlanRequest(google.protobuf.message.Message): SESSION_ID_FIELD_NUMBER: builtins.int USER_CONTEXT_FIELD_NUMBER: builtins.int + OPERATION_ID_FIELD_NUMBER: builtins.int PLAN_FIELD_NUMBER: builtins.int CLIENT_TYPE_FIELD_NUMBER: builtins.int REQUEST_OPTIONS_FIELD_NUMBER: builtins.int + TAGS_FIELD_NUMBER: builtins.int session_id: builtins.str """(Required) @@ -1048,6 +1050,12 @@ class ExecutePlanRequest(google.protobuf.message.Message): user_context.user_id and session+id both identify a unique remote spark session on the server side. """ + operation_id: builtins.str + """(Optional) + Provide an id for this request. If not provided, it will be generated by the server. + It is returned in every ExecutePlanResponse.operation_id of the ExecutePlan response stream. + The id must be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + """ @property def plan(self) -> global___Plan: """(Required) The logical plan to be executed / analyzed.""" @@ -1065,23 +1073,36 @@ class ExecutePlanRequest(google.protobuf.message.Message): """Repeated element for options that can be passed to the request. This element is currently unused but allows to pass in an extension value used for arbitrary options. """ + @property + def tags( + self, + ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: + """Tags to tag the given execution with. + Used by Interrupt with interrupt.tag. + """ def __init__( self, *, session_id: builtins.str = ..., user_context: global___UserContext | None = ..., + operation_id: builtins.str | None = ..., plan: global___Plan | None = ..., client_type: builtins.str | None = ..., request_options: collections.abc.Iterable[global___ExecutePlanRequest.RequestOption] | None = ..., + tags: collections.abc.Iterable[builtins.str] | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_client_type", b"_client_type", + "_operation_id", + b"_operation_id", "client_type", b"client_type", + "operation_id", + b"operation_id", "plan", b"plan", "user_context", @@ -1093,21 +1114,32 @@ class ExecutePlanRequest(google.protobuf.message.Message): field_name: typing_extensions.Literal[ "_client_type", b"_client_type", + "_operation_id", + b"_operation_id", "client_type", b"client_type", + "operation_id", + b"operation_id", "plan", b"plan", "request_options", b"request_options", "session_id", b"session_id", + "tags", + b"tags", "user_context", b"user_context", ], ) -> None: ... + @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal["_client_type", b"_client_type"] ) -> typing_extensions.Literal["client_type"] | None: ... + @typing.overload + def WhichOneof( + self, oneof_group: typing_extensions.Literal["_operation_id", b"_operation_id"] + ) -> typing_extensions.Literal["operation_id"] | None: ... global___ExecutePlanRequest = ExecutePlanRequest @@ -1290,6 +1322,7 @@ class ExecutePlanResponse(google.protobuf.message.Message): ) -> None: ... SESSION_ID_FIELD_NUMBER: builtins.int + OPERATION_ID_FIELD_NUMBER: builtins.int ARROW_BATCH_FIELD_NUMBER: builtins.int SQL_COMMAND_RESULT_FIELD_NUMBER: builtins.int WRITE_STREAM_OPERATION_START_RESULT_FIELD_NUMBER: builtins.int @@ -1301,6 +1334,12 @@ class ExecutePlanResponse(google.protobuf.message.Message): OBSERVED_METRICS_FIELD_NUMBER: builtins.int SCHEMA_FIELD_NUMBER: builtins.int session_id: builtins.str + operation_id: builtins.str + """Identifies the ExecutePlan execution. + If set by the client in ExecutePlanRequest.operationId, that value is returned. + Otherwise generated by the server. + It is an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + """ @property def arrow_batch(self) -> global___ExecutePlanResponse.ArrowBatch: ... @property @@ -1348,6 +1387,7 @@ class ExecutePlanResponse(google.protobuf.message.Message): self, *, session_id: builtins.str = ..., + operation_id: builtins.str = ..., arrow_batch: global___ExecutePlanResponse.ArrowBatch | None = ..., sql_command_result: global___ExecutePlanResponse.SqlCommandResult | None = ..., write_stream_operation_start_result: pyspark.sql.connect.proto.commands_pb2.WriteStreamOperationStartResult @@ -1402,6 +1442,8 @@ class ExecutePlanResponse(google.protobuf.message.Message): b"metrics", "observed_metrics", b"observed_metrics", + "operation_id", + b"operation_id", "response_type", b"response_type", "schema", @@ -2208,17 +2250,27 @@ class InterruptRequest(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor INTERRUPT_TYPE_UNSPECIFIED: InterruptRequest._InterruptType.ValueType # 0 INTERRUPT_TYPE_ALL: InterruptRequest._InterruptType.ValueType # 1 - """Interrupt all running executions within session with provided session_id.""" + """Interrupt all running executions within the session with provided the session_id.""" + INTERRUPT_TYPE_TAG: InterruptRequest._InterruptType.ValueType # 2 + """Interrupt all running executions within the session with the provided tag.""" + INTERRUPT_TYPE_ID: InterruptRequest._InterruptType.ValueType # 3 + """Interrupt the running execution within the session with the provided id.""" class InterruptType(_InterruptType, metaclass=_InterruptTypeEnumTypeWrapper): ... INTERRUPT_TYPE_UNSPECIFIED: InterruptRequest.InterruptType.ValueType # 0 INTERRUPT_TYPE_ALL: InterruptRequest.InterruptType.ValueType # 1 - """Interrupt all running executions within session with provided session_id.""" + """Interrupt all running executions within the session with provided the session_id.""" + INTERRUPT_TYPE_TAG: InterruptRequest.InterruptType.ValueType # 2 + """Interrupt all running executions within the session with the provided tag.""" + INTERRUPT_TYPE_ID: InterruptRequest.InterruptType.ValueType # 3 + """Interrupt the running execution within the session with the provided id.""" SESSION_ID_FIELD_NUMBER: builtins.int USER_CONTEXT_FIELD_NUMBER: builtins.int CLIENT_TYPE_FIELD_NUMBER: builtins.int INTERRUPT_TYPE_FIELD_NUMBER: builtins.int + OPERATION_TAG_FIELD_NUMBER: builtins.int + OPERATION_ID_FIELD_NUMBER: builtins.int session_id: builtins.str """(Required) @@ -2236,6 +2288,10 @@ class InterruptRequest(google.protobuf.message.Message): """ interrupt_type: global___InterruptRequest.InterruptType.ValueType """(Required) The type of interrupt to execute.""" + operation_tag: builtins.str + """if interrupt_tag == INTERRUPT_TYPE_TAG, interrupt operation with this tag.""" + operation_id: builtins.str + """if interrupt_tag == INTERRUPT_TYPE_ID, interrupt operation with this operation_id.""" def __init__( self, *, @@ -2243,6 +2299,8 @@ class InterruptRequest(google.protobuf.message.Message): user_context: global___UserContext | None = ..., client_type: builtins.str | None = ..., interrupt_type: global___InterruptRequest.InterruptType.ValueType = ..., + operation_tag: builtins.str = ..., + operation_id: builtins.str = ..., ) -> None: ... def HasField( self, @@ -2251,6 +2309,12 @@ class InterruptRequest(google.protobuf.message.Message): b"_client_type", "client_type", b"client_type", + "interrupt", + b"interrupt", + "operation_id", + b"operation_id", + "operation_tag", + b"operation_tag", "user_context", b"user_context", ], @@ -2262,17 +2326,28 @@ class InterruptRequest(google.protobuf.message.Message): b"_client_type", "client_type", b"client_type", + "interrupt", + b"interrupt", "interrupt_type", b"interrupt_type", + "operation_id", + b"operation_id", + "operation_tag", + b"operation_tag", "session_id", b"session_id", "user_context", b"user_context", ], ) -> None: ... + @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal["_client_type", b"_client_type"] ) -> typing_extensions.Literal["client_type"] | None: ... + @typing.overload + def WhichOneof( + self, oneof_group: typing_extensions.Literal["interrupt", b"interrupt"] + ) -> typing_extensions.Literal["operation_tag", "operation_id"] | None: ... global___InterruptRequest = InterruptRequest @@ -2280,14 +2355,25 @@ class InterruptResponse(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor SESSION_ID_FIELD_NUMBER: builtins.int + INTERRUPTED_IDS_FIELD_NUMBER: builtins.int session_id: builtins.str + """Session id in which the interrupt was running.""" + @property + def interrupted_ids( + self, + ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: + """Operation ids of the executions which were interrupted.""" def __init__( self, *, session_id: builtins.str = ..., + interrupted_ids: collections.abc.Iterable[builtins.str] | None = ..., ) -> None: ... def ClearField( - self, field_name: typing_extensions.Literal["session_id", b"session_id"] + self, + field_name: typing_extensions.Literal[ + "interrupted_ids", b"interrupted_ids", "session_id", b"session_id" + ], ) -> None: ... global___InterruptResponse = InterruptResponse From 6d073ccdb0a62ef2db77737113e73a844b7d9848 Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Fri, 14 Jul 2023 12:40:21 +0200 Subject: [PATCH 02/21] error class --- .../main/resources/error/error-classes.json | 18 ++++++++++ ...r-conditions-invalid-handle-error-class.md | 36 +++++++++++++++++++ docs/sql-error-conditions.md | 8 +++++ 3 files changed, 62 insertions(+) create mode 100644 docs/sql-error-conditions-invalid-handle-error-class.md diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index 4debf3da0b81..8e1c9889ad27 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -1383,6 +1383,24 @@ ], "sqlState" : "22023" }, + "INVALID_HANDLE" : { + "message" : [ + "The handle is invalid." + ], + "subClass" : { + "ALREADY_EXISTS" : { + "message" : [ + "Handle already exists." + ] + }, + "FORMAT" : { + "message" : [ + "Handle has invalid format. Handle must an UUID string of the format '00112233-4455-6677-8899-aabbccddeeff'" + ] + } + }, + "sqlState" : "HY000" + }, "INVALID_HIVE_COLUMN_NAME" : { "message" : [ "Cannot create the table having the nested column whose name contains invalid characters in Hive metastore." diff --git a/docs/sql-error-conditions-invalid-handle-error-class.md b/docs/sql-error-conditions-invalid-handle-error-class.md new file mode 100644 index 000000000000..7a1ce4c011d7 --- /dev/null +++ b/docs/sql-error-conditions-invalid-handle-error-class.md @@ -0,0 +1,36 @@ +--- +layout: global +title: INVALID_HANDLE error class +displayTitle: INVALID_HANDLE error class +license: | + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--- + +SQLSTATE: HY000 + +The handle `` is invalid. + +This error class has the following derived error classes: + +## ALREADY_EXISTS + +Handle already exists. + +## FORMAT + +Handle has invalid format. Handle must an UUID string of the format '00112233-4455-6677-8899-aabbccddeeff' + + diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index 00fe6d75f538..d6c2b544b51c 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -910,6 +910,14 @@ For more details see [INVALID_FORMAT](sql-error-conditions-invalid-format-error- The fraction of sec must be zero. Valid range is [0, 60]. If necessary set `` to "false" to bypass this error. +### [INVALID_HANDLE](sql-error-conditions-invalid-handle-error-class.html) + +SQLSTATE: HY000 + +The handle `` is invalid. + +For more details see [INVALID_HANDLE](sql-error-conditions-invalid-handle-error-class.html) + ### INVALID_HIVE_COLUMN_NAME SQLSTATE: none assigned From 380a373a160503f2f466ae41122441689318cd79 Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Fri, 14 Jul 2023 14:40:41 +0200 Subject: [PATCH 03/21] server side changes --- .../sql/connect/service/ExecuteHolder.scala | 27 +++++++ .../sql/connect/service/SessionHolder.scala | 70 +++++++++++++++---- .../SparkConnectInterruptHandler.scala | 23 +++++- .../scala/org/apache/spark/SparkContext.scala | 1 + 4 files changed, 103 insertions(+), 18 deletions(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala index 1f70973b60e0..6f357566c578 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.connect.service +import org.apache.spark.SparkContext import org.apache.spark.connect.proto import org.apache.spark.internal.Logging import org.apache.spark.sql.connect.execution.{ExecuteGrpcResponseSender, ExecuteResponseObserver, ExecuteThreadRunner} @@ -37,6 +38,11 @@ private[connect] class ExecuteHolder( s"Session_${sessionHolder.sessionId}_" + s"Request_${operationId}" + val userDefinedTags: Seq[String] = request.getTagsList().asScala.map { tag => + throwIfInvalidTag(tag) + tag + } + val session = sessionHolder.session val responseObserver: ExecuteResponseObserver[proto.ExecutePlanResponse] = @@ -89,4 +95,25 @@ private[connect] class ExecuteHolder( def interrupt(): Unit = { runner.interrupt() } + + def tagToSparkJobTag(tag: String): String = { + "SparkConnectUserDefinedTag_" + + s"User_${sessionHolder.userId}_Session_${sessionHolder.sessionId}" + } + + private def throwIfInvalidTag(tag: String) = { + // Same format rules apply to Spark Connect execution tags as to SparkContext job tags. + // see SparkContext.throwIfInvalidTag. + if (tag == null) { + throw new IllegalArgumentException("Spark Connect execution tag cannot be null.") + } + if (tag.contains(SparkContext.SPARK_JOB_TAGS_SEP)) { + throw new IllegalArgumentException( + s"Spark Connect execution tag cannot contain '${SparkContext.SPARK_JOB_TAGS_SEP}'.") + } + if (tag.isEmpty) { + throw new IllegalArgumentException( + "Spark Connect execution tag cannot be an empty string.") + } + } } diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala index 5ac4f6db82aa..629ff5ff8e8f 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala @@ -22,10 +22,9 @@ import java.util.UUID import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap} import scala.collection.JavaConverters._ -import scala.util.control.NonFatal +import scala.collection.mutable -import org.apache.spark.JobArtifactSet -import org.apache.spark.SparkException +import org.apache.spark.{JobArtifactSet, SparkException, SparkSQLException} import org.apache.spark.connect.proto import org.apache.spark.internal.Logging import org.apache.spark.sql.DataFrame @@ -56,10 +55,29 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio private lazy val listenerCache: ConcurrentMap[String, StreamingQueryListener] = new ConcurrentHashMap() - private[connect] def createExecuteHolder(request: proto.ExecutePlanRequest): ExecuteHolder = { - val operationId = UUID.randomUUID().toString - val executePlanHolder = new ExecuteHolder(request, operationId, this) - assert(executions.putIfAbsent(operationId, executePlanHolder) == null) + private[connect] def createExecutePlanHolder( + request: proto.ExecutePlanRequest): ExecutePlanHolder = { + val operationId = if (request.hasOperationId) { + try { + UUID.fromString(request.getOperationId).toString + } catch { + case _: IllegalArgumentException => + throw new SparkSQLException( + errorClass = "INVALID_HANDLE.FORMAT", + messageParameters = Map("handle" -> request.getOperationId) + ) + } + } else { + UUID.randomUUID().toString + } + val executePlanHolder = ExecutePlanHolder(operationId, this, request) + val oldExecute = executePlanOperations.putIfAbsent(operationId, executePlanHolder) + if (oldExecute != null) { + throw new SparkSQLException( + errorClass = "INVALID_HANDLE.ALREADY_EXISTS", + messageParameters = Map("handle" -> operationId) + ) + } executePlanHolder } @@ -71,17 +89,39 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio executions.remove(operationId) } - private[connect] def interruptAll(): Unit = { - executions.asScala.values.foreach { execute => - // Eat exception while trying to interrupt a given execution and move forward. - try { - logDebug(s"Interrupting execution ${execute.operationId}") + /** Interrupt all executions in the session. + * @return list of operationIds of interrupted executions */ + private[connect] def interruptAll(): Seq[String] = { + val interruptedIds = new mutable.ArrayBuffer[String]() + executePlanOperations.asScala.values.foreach { execute => + interruptedIds += execute.operationId + execute.interrupt() + } + interruptedIds.toSeq + } + + /** Interrupt executions in the session with a given tag. + * @return list of operationIds of interrupted executions */ + private[connect] def interruptTag(tag: String): Seq[String] = { + val interruptedIds = new mutable.ArrayBuffer[String]() + executePlanOperations.asScala.values.foreach { execute => + if (execute.userDefinedTags.contains(tag)) { + interruptedIds += execute.operationId execute.interrupt() - } catch { - case NonFatal(e) => - logWarning(s"Exception $e while trying to interrupt execution ${execute.operationId}") } } + interruptedIds.toSeq + } + + /** Interrupt the execution with the given operation_id + * @return list of operationIds of interrupted executions (one element or empty) */ + private[connect] def interruptOperation(operationId: String): Seq[String] = { + val interruptedIds = new mutable.ArrayBuffer[String]() + Option(executePlanOperations.get(operationId)).foreach { execute => + interruptedIds += execute.operationId + execute.interrupt() + } + interruptedIds.toSeq } private[connect] lazy val artifactManager = new SparkConnectArtifactManager(this) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterruptHandler.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterruptHandler.scala index b0923e277e42..86e91bf6ca38 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterruptHandler.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterruptHandler.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.connect.service +import scala.collection.JavaConverters._ + import io.grpc.stub.StreamObserver import org.apache.spark.connect.proto @@ -30,16 +32,31 @@ class SparkConnectInterruptHandler(responseObserver: StreamObserver[proto.Interr SparkConnectService .getOrCreateIsolatedSession(v.getUserContext.getUserId, v.getSessionId) - v.getInterruptType match { + val interruptedIds = v.getInterruptType match { case proto.InterruptRequest.InterruptType.INTERRUPT_TYPE_ALL => sessionHolder.interruptAll() + case proto.InterruptRequest.InterruptType.INTERRUPT_TYPE_TAG => + if (!v.hasOperationTag) { + throw new IllegalArgumentException( + s"INTERRUPT_TYPE_TAG requested, but no operation_tag provided.") + } + sessionHolder.interruptTag(v.getOperationTag) + case proto.InterruptRequest.InterruptType.INTERRUPT_TYPE_ID => + if (!v.hasOperationId) { + throw new IllegalArgumentException( + s"INTERRUPT_TYPE_ID requested, but no operation_id provided.") + } + sessionHolder.interruptTag(v.getOperationId) case other => throw new UnsupportedOperationException(s"Unknown InterruptType $other!") } - val builder = proto.InterruptResponse.newBuilder().setSessionId(v.getSessionId) + val response = proto.InterruptResponse.newBuilder() + .setSessionId(v.getSessionId) + .addAllInterruptedIds(interruptedIds.asJava) + .build() - responseObserver.onNext(builder.build()) + responseObserver.onNext(response) responseObserver.onCompleted() } } diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 80f7eaf00ed2..26fdb86d2990 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -2975,6 +2975,7 @@ object SparkContext extends Logging { /** Separator of tags in SPARK_JOB_TAGS property */ private[spark] val SPARK_JOB_TAGS_SEP = "," + // Same rules apply to Spark Connect execution tags, see ExecuteHolder.throwIfInvalidTag private[spark] def throwIfInvalidTag(tag: String) = { if (tag == null) { throw new IllegalArgumentException("Spark job tag cannot be null.") From 4d9ad5349784261c3a4dc6aaa953e54f32a3f94b Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Fri, 14 Jul 2023 19:32:45 +0200 Subject: [PATCH 04/21] clean after merge --- .../spark/sql/connect/service/ExecuteHolder.scala | 2 ++ .../spark/sql/connect/service/SessionHolder.scala | 13 ++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala index 6f357566c578..74baf4c220cf 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.connect.service +import scala.collection.JavaConverters._ + import org.apache.spark.SparkContext import org.apache.spark.connect.proto import org.apache.spark.internal.Logging diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala index 629ff5ff8e8f..6652cef2e80e 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala @@ -55,8 +55,7 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio private lazy val listenerCache: ConcurrentMap[String, StreamingQueryListener] = new ConcurrentHashMap() - private[connect] def createExecutePlanHolder( - request: proto.ExecutePlanRequest): ExecutePlanHolder = { + private[connect] def createExecuteHolder(request: proto.ExecutePlanRequest): ExecuteHolder = { val operationId = if (request.hasOperationId) { try { UUID.fromString(request.getOperationId).toString @@ -70,8 +69,8 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio } else { UUID.randomUUID().toString } - val executePlanHolder = ExecutePlanHolder(operationId, this, request) - val oldExecute = executePlanOperations.putIfAbsent(operationId, executePlanHolder) + val executePlanHolder = new ExecuteHolder(request, operationId, this) + val oldExecute = executions.putIfAbsent(operationId, executePlanHolder) if (oldExecute != null) { throw new SparkSQLException( errorClass = "INVALID_HANDLE.ALREADY_EXISTS", @@ -93,7 +92,7 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio * @return list of operationIds of interrupted executions */ private[connect] def interruptAll(): Seq[String] = { val interruptedIds = new mutable.ArrayBuffer[String]() - executePlanOperations.asScala.values.foreach { execute => + executions.asScala.values.foreach { execute => interruptedIds += execute.operationId execute.interrupt() } @@ -104,7 +103,7 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio * @return list of operationIds of interrupted executions */ private[connect] def interruptTag(tag: String): Seq[String] = { val interruptedIds = new mutable.ArrayBuffer[String]() - executePlanOperations.asScala.values.foreach { execute => + executions.asScala.values.foreach { execute => if (execute.userDefinedTags.contains(tag)) { interruptedIds += execute.operationId execute.interrupt() @@ -117,7 +116,7 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio * @return list of operationIds of interrupted executions (one element or empty) */ private[connect] def interruptOperation(operationId: String): Seq[String] = { val interruptedIds = new mutable.ArrayBuffer[String]() - Option(executePlanOperations.get(operationId)).foreach { execute => + Option(executions.get(operationId)).foreach { execute => interruptedIds += execute.operationId execute.interrupt() } From 6095e6b01aa6dfcb6d43051a08fe41780f9736fd Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Fri, 14 Jul 2023 23:31:23 +0200 Subject: [PATCH 05/21] lint --- .../main/protobuf/spark/connect/base.proto | 6 ++--- .../sql/connect/service/ExecuteHolder.scala | 3 +-- .../sql/connect/service/SessionHolder.scala | 27 ++++++++++++------- .../SparkConnectInterruptHandler.scala | 5 ++-- 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/connector/connect/common/src/main/protobuf/spark/connect/base.proto b/connector/connect/common/src/main/protobuf/spark/connect/base.proto index 541ef6b8d941..35e472bd6492 100644 --- a/connector/connect/common/src/main/protobuf/spark/connect/base.proto +++ b/connector/connect/common/src/main/protobuf/spark/connect/base.proto @@ -632,13 +632,13 @@ message InterruptRequest { enum InterruptType { INTERRUPT_TYPE_UNSPECIFIED = 0; - // Interrupt all running executions within the session with provided the session_id. + // Interrupt all running executions within the session with the provided session_id. INTERRUPT_TYPE_ALL = 1; - // Interrupt all running executions within the session with the provided tag. + // Interrupt all running executions within the session with the provided operation_tag. INTERRUPT_TYPE_TAG = 2; - // Interrupt the running execution within the session with the provided id. + // Interrupt the running execution within the session with the provided operation_id. INTERRUPT_TYPE_ID = 3; } diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala index 74baf4c220cf..d9420b4a0751 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala @@ -114,8 +114,7 @@ private[connect] class ExecuteHolder( s"Spark Connect execution tag cannot contain '${SparkContext.SPARK_JOB_TAGS_SEP}'.") } if (tag.isEmpty) { - throw new IllegalArgumentException( - "Spark Connect execution tag cannot be an empty string.") + throw new IllegalArgumentException("Spark Connect execution tag cannot be an empty string.") } } } diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala index 6652cef2e80e..3b46adab954a 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala @@ -63,8 +63,7 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio case _: IllegalArgumentException => throw new SparkSQLException( errorClass = "INVALID_HANDLE.FORMAT", - messageParameters = Map("handle" -> request.getOperationId) - ) + messageParameters = Map("handle" -> request.getOperationId)) } } else { UUID.randomUUID().toString @@ -74,8 +73,7 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio if (oldExecute != null) { throw new SparkSQLException( errorClass = "INVALID_HANDLE.ALREADY_EXISTS", - messageParameters = Map("handle" -> operationId) - ) + messageParameters = Map("handle" -> operationId)) } executePlanHolder } @@ -88,8 +86,11 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio executions.remove(operationId) } - /** Interrupt all executions in the session. - * @return list of operationIds of interrupted executions */ + /** + * Interrupt all executions in the session. + * @return + * list of operationIds of interrupted executions + */ private[connect] def interruptAll(): Seq[String] = { val interruptedIds = new mutable.ArrayBuffer[String]() executions.asScala.values.foreach { execute => @@ -99,8 +100,11 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio interruptedIds.toSeq } - /** Interrupt executions in the session with a given tag. - * @return list of operationIds of interrupted executions */ + /** + * Interrupt executions in the session with a given tag. + * @return + * list of operationIds of interrupted executions + */ private[connect] def interruptTag(tag: String): Seq[String] = { val interruptedIds = new mutable.ArrayBuffer[String]() executions.asScala.values.foreach { execute => @@ -112,8 +116,11 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio interruptedIds.toSeq } - /** Interrupt the execution with the given operation_id - * @return list of operationIds of interrupted executions (one element or empty) */ + /** + * Interrupt the execution with the given operation_id + * @return + * list of operationIds of interrupted executions (one element or empty) + */ private[connect] def interruptOperation(operationId: String): Seq[String] = { val interruptedIds = new mutable.ArrayBuffer[String]() Option(executions.get(operationId)).foreach { execute => diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterruptHandler.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterruptHandler.scala index 86e91bf6ca38..e05f8f73fc73 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterruptHandler.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterruptHandler.scala @@ -46,12 +46,13 @@ class SparkConnectInterruptHandler(responseObserver: StreamObserver[proto.Interr throw new IllegalArgumentException( s"INTERRUPT_TYPE_ID requested, but no operation_id provided.") } - sessionHolder.interruptTag(v.getOperationId) + sessionHolder.interruptOperation(v.getOperationId) case other => throw new UnsupportedOperationException(s"Unknown InterruptType $other!") } - val response = proto.InterruptResponse.newBuilder() + val response = proto.InterruptResponse + .newBuilder() .setSessionId(v.getSessionId) .addAllInterruptedIds(interruptedIds.asJava) .build() From 65e9cf5a336ba045dfc48dad0455cc3b4122a730 Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Fri, 14 Jul 2023 23:37:10 +0200 Subject: [PATCH 06/21] fix error doc --- docs/sql-error-conditions-invalid-handle-error-class.md | 2 +- docs/sql-error-conditions.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sql-error-conditions-invalid-handle-error-class.md b/docs/sql-error-conditions-invalid-handle-error-class.md index 7a1ce4c011d7..7c083bc5f50c 100644 --- a/docs/sql-error-conditions-invalid-handle-error-class.md +++ b/docs/sql-error-conditions-invalid-handle-error-class.md @@ -19,7 +19,7 @@ license: | limitations under the License. --- -SQLSTATE: HY000 +[SQLSTATE: HY000](sql-error-conditions-sqlstates.html#class-HY-cli-specific-condition) The handle `` is invalid. diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index d6c2b544b51c..a7bdf58f1b18 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -912,7 +912,7 @@ The fraction of sec must be zero. Valid range is [0, 60]. If necessary set `` is invalid. From ae6cbaf126bb7a1ace0b47260694969829696607 Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Mon, 17 Jul 2023 17:45:37 +0200 Subject: [PATCH 07/21] fix after rebase --- .../spark/sql/connect/service/ExecuteHolder.scala | 2 +- python/pyspark/sql/connect/proto/base_pb2.pyi | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala index d9420b4a0751..c90f2a570b56 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala @@ -40,7 +40,7 @@ private[connect] class ExecuteHolder( s"Session_${sessionHolder.sessionId}_" + s"Request_${operationId}" - val userDefinedTags: Seq[String] = request.getTagsList().asScala.map { tag => + val userDefinedTags: Seq[String] = request.getTagsList().asScala.toSeq.map { tag => throwIfInvalidTag(tag) tag } diff --git a/python/pyspark/sql/connect/proto/base_pb2.pyi b/python/pyspark/sql/connect/proto/base_pb2.pyi index 728a23d6e879..0533ad6a4837 100644 --- a/python/pyspark/sql/connect/proto/base_pb2.pyi +++ b/python/pyspark/sql/connect/proto/base_pb2.pyi @@ -2250,20 +2250,20 @@ class InterruptRequest(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor INTERRUPT_TYPE_UNSPECIFIED: InterruptRequest._InterruptType.ValueType # 0 INTERRUPT_TYPE_ALL: InterruptRequest._InterruptType.ValueType # 1 - """Interrupt all running executions within the session with provided the session_id.""" + """Interrupt all running executions within the session with the provided session_id.""" INTERRUPT_TYPE_TAG: InterruptRequest._InterruptType.ValueType # 2 - """Interrupt all running executions within the session with the provided tag.""" + """Interrupt all running executions within the session with the provided operation_tag.""" INTERRUPT_TYPE_ID: InterruptRequest._InterruptType.ValueType # 3 - """Interrupt the running execution within the session with the provided id.""" + """Interrupt the running execution within the session with the provided operation_id.""" class InterruptType(_InterruptType, metaclass=_InterruptTypeEnumTypeWrapper): ... INTERRUPT_TYPE_UNSPECIFIED: InterruptRequest.InterruptType.ValueType # 0 INTERRUPT_TYPE_ALL: InterruptRequest.InterruptType.ValueType # 1 - """Interrupt all running executions within the session with provided the session_id.""" + """Interrupt all running executions within the session with the provided session_id.""" INTERRUPT_TYPE_TAG: InterruptRequest.InterruptType.ValueType # 2 - """Interrupt all running executions within the session with the provided tag.""" + """Interrupt all running executions within the session with the provided operation_tag.""" INTERRUPT_TYPE_ID: InterruptRequest.InterruptType.ValueType # 3 - """Interrupt the running execution within the session with the provided id.""" + """Interrupt the running execution within the session with the provided operation_id.""" SESSION_ID_FIELD_NUMBER: builtins.int USER_CONTEXT_FIELD_NUMBER: builtins.int From b213c008dcb2d8703fb60a51d74fd26d2af61cce Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Tue, 18 Jul 2023 13:15:13 +0200 Subject: [PATCH 08/21] INTERRUPT_TYPE_ID -> INTERRUPT_TYPE_OPERATION_ID --- .../src/main/protobuf/spark/connect/base.proto | 4 ++-- .../service/SparkConnectInterruptHandler.scala | 4 ++-- python/pyspark/sql/connect/proto/base_pb2.py | 16 ++++++++-------- python/pyspark/sql/connect/proto/base_pb2.pyi | 6 +++--- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/connector/connect/common/src/main/protobuf/spark/connect/base.proto b/connector/connect/common/src/main/protobuf/spark/connect/base.proto index 35e472bd6492..5449eb56d721 100644 --- a/connector/connect/common/src/main/protobuf/spark/connect/base.proto +++ b/connector/connect/common/src/main/protobuf/spark/connect/base.proto @@ -639,14 +639,14 @@ message InterruptRequest { INTERRUPT_TYPE_TAG = 2; // Interrupt the running execution within the session with the provided operation_id. - INTERRUPT_TYPE_ID = 3; + INTERRUPT_TYPE_OPERATION_ID = 3; } oneof interrupt { // if interrupt_tag == INTERRUPT_TYPE_TAG, interrupt operation with this tag. string operation_tag = 5; - // if interrupt_tag == INTERRUPT_TYPE_ID, interrupt operation with this operation_id. + // if interrupt_tag == INTERRUPT_TYPE_OPERATION_ID, interrupt operation with this operation_id. string operation_id = 6; } } diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterruptHandler.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterruptHandler.scala index e05f8f73fc73..a9ed391460ca 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterruptHandler.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterruptHandler.scala @@ -41,10 +41,10 @@ class SparkConnectInterruptHandler(responseObserver: StreamObserver[proto.Interr s"INTERRUPT_TYPE_TAG requested, but no operation_tag provided.") } sessionHolder.interruptTag(v.getOperationTag) - case proto.InterruptRequest.InterruptType.INTERRUPT_TYPE_ID => + case proto.InterruptRequest.InterruptType.INTERRUPT_TYPE_OPERATION_ID => if (!v.hasOperationId) { throw new IllegalArgumentException( - s"INTERRUPT_TYPE_ID requested, but no operation_id provided.") + s"INTERRUPT_TYPE_OPERATION_ID requested, but no operation_id provided.") } sessionHolder.interruptOperation(v.getOperationId) case other => diff --git a/python/pyspark/sql/connect/proto/base_pb2.py b/python/pyspark/sql/connect/proto/base_pb2.py index d210b26e3670..04044d4cdcf3 100644 --- a/python/pyspark/sql/connect/proto/base_pb2.py +++ b/python/pyspark/sql/connect/proto/base_pb2.py @@ -37,7 +37,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xf5\x12\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x12\x45\n\x07persist\x18\x0e \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.PersistH\x00R\x07persist\x12K\n\tunpersist\x18\x0f \x01(\x0b\x32+.spark.connect.AnalyzePlanRequest.UnpersistH\x00R\tunpersist\x12_\n\x11get_storage_level\x18\x10 \x01(\x0b\x32\x31.spark.connect.AnalyzePlanRequest.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1aZ\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12\x19\n\x05level\x18\x02 \x01(\x05H\x00R\x05level\x88\x01\x01\x42\x08\n\x06_level\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x97\x01\n\x07Persist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x45\n\rstorage_level\x18\x02 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level\x1an\n\tUnpersist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x1f\n\x08\x62locking\x18\x02 \x01(\x08H\x00R\x08\x62locking\x88\x01\x01\x42\x0b\n\t_blocking\x1a\x46\n\x0fGetStorageLevel\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relationB\t\n\x07\x61nalyzeB\x0e\n\x0c_client_type"\x99\r\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x12\x46\n\x07persist\x18\x0c \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.PersistH\x00R\x07persist\x12L\n\tunpersist\x18\r \x01(\x0b\x32,.spark.connect.AnalyzePlanResponse.UnpersistH\x00R\tunpersist\x12`\n\x11get_storage_level\x18\x0e \x01(\x0b\x32\x32.spark.connect.AnalyzePlanResponse.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06result\x1a\t\n\x07Persist\x1a\x0b\n\tUnpersist\x1aS\n\x0fGetStorageLevel\x12@\n\rstorage_level\x18\x01 \x01(\x0b\x32\x1b.spark.connect.StorageLevelR\x0cstorageLevelB\x08\n\x06result"\xd2\x03\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12&\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12X\n\x0frequest_options\x18\x05 \x03(\x0b\x32/.spark.connect.ExecutePlanRequest.RequestOptionR\x0erequestOptions\x12\x12\n\x04tags\x18\x07 \x03(\tR\x04tags\x1aX\n\rRequestOption\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x10\n\x0erequest_optionB\x0f\n\r_operation_idB\x0e\n\x0c_client_type"\x88\x0e\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12!\n\x0coperation_id\x18\x0c \x01(\tR\x0boperationId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12~\n#write_stream_operation_start_result\x18\x08 \x01(\x0b\x32..spark.connect.WriteStreamOperationStartResultH\x00R\x1fwriteStreamOperationStartResult\x12q\n\x1estreaming_query_command_result\x18\t \x01(\x0b\x32*.spark.connect.StreamingQueryCommandResultH\x00R\x1bstreamingQueryCommandResult\x12k\n\x1cget_resources_command_result\x18\n \x01(\x0b\x32(.spark.connect.GetResourcesCommandResultH\x00R\x19getResourcesCommandResult\x12\x87\x01\n&streaming_query_manager_command_result\x18\x0b \x01(\x0b\x32\x31.spark.connect.StreamingQueryManagerCommandResultH\x00R"streamingQueryManagerCommandResult\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x12/\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1a=\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1a`\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06valuesB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x84\x08\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB\x0e\n\x0c_client_type"z\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xe7\x06\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x06 \x01(\tH\x01R\nclientType\x88\x01\x01\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payloadB\x0e\n\x0c_client_type"\xbc\x01\n\x14\x41\x64\x64\x41rtifactsResponse\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful"\xc3\x01\n\x17\x41rtifactStatusesRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x12\x14\n\x05names\x18\x04 \x03(\tR\x05namesB\x0e\n\x0c_client_type"\x8c\x02\n\x18\x41rtifactStatusesResponse\x12Q\n\x08statuses\x18\x01 \x03(\x0b\x32\x35.spark.connect.ArtifactStatusesResponse.StatusesEntryR\x08statuses\x1a(\n\x0e\x41rtifactStatus\x12\x16\n\x06\x65xists\x18\x01 \x01(\x08R\x06\x65xists\x1as\n\rStatusesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ArtifactStatusesResponse.ArtifactStatusR\x05value:\x02\x38\x01"\xcd\x03\n\x10InterruptRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12T\n\x0einterrupt_type\x18\x04 \x01(\x0e\x32-.spark.connect.InterruptRequest.InterruptTypeR\rinterruptType\x12%\n\roperation_tag\x18\x05 \x01(\tH\x00R\x0coperationTag\x12#\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId"v\n\rInterruptType\x12\x1e\n\x1aINTERRUPT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12INTERRUPT_TYPE_ALL\x10\x01\x12\x16\n\x12INTERRUPT_TYPE_TAG\x10\x02\x12\x15\n\x11INTERRUPT_TYPE_ID\x10\x03\x42\x0b\n\tinterruptB\x0e\n\x0c_client_type"[\n\x11InterruptResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\'\n\x0finterrupted_ids\x18\x02 \x03(\tR\x0einterruptedIds2\xa4\x04\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x12\x63\n\x0e\x41rtifactStatus\x12&.spark.connect.ArtifactStatusesRequest\x1a\'.spark.connect.ArtifactStatusesResponse"\x00\x12P\n\tInterrupt\x12\x1f.spark.connect.InterruptRequest\x1a .spark.connect.InterruptResponse"\x00\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' + b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xf5\x12\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x12\x45\n\x07persist\x18\x0e \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.PersistH\x00R\x07persist\x12K\n\tunpersist\x18\x0f \x01(\x0b\x32+.spark.connect.AnalyzePlanRequest.UnpersistH\x00R\tunpersist\x12_\n\x11get_storage_level\x18\x10 \x01(\x0b\x32\x31.spark.connect.AnalyzePlanRequest.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1aZ\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12\x19\n\x05level\x18\x02 \x01(\x05H\x00R\x05level\x88\x01\x01\x42\x08\n\x06_level\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x97\x01\n\x07Persist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x45\n\rstorage_level\x18\x02 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level\x1an\n\tUnpersist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x1f\n\x08\x62locking\x18\x02 \x01(\x08H\x00R\x08\x62locking\x88\x01\x01\x42\x0b\n\t_blocking\x1a\x46\n\x0fGetStorageLevel\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relationB\t\n\x07\x61nalyzeB\x0e\n\x0c_client_type"\x99\r\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x12\x46\n\x07persist\x18\x0c \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.PersistH\x00R\x07persist\x12L\n\tunpersist\x18\r \x01(\x0b\x32,.spark.connect.AnalyzePlanResponse.UnpersistH\x00R\tunpersist\x12`\n\x11get_storage_level\x18\x0e \x01(\x0b\x32\x32.spark.connect.AnalyzePlanResponse.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06result\x1a\t\n\x07Persist\x1a\x0b\n\tUnpersist\x1aS\n\x0fGetStorageLevel\x12@\n\rstorage_level\x18\x01 \x01(\x0b\x32\x1b.spark.connect.StorageLevelR\x0cstorageLevelB\x08\n\x06result"\xd2\x03\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12&\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12X\n\x0frequest_options\x18\x05 \x03(\x0b\x32/.spark.connect.ExecutePlanRequest.RequestOptionR\x0erequestOptions\x12\x12\n\x04tags\x18\x07 \x03(\tR\x04tags\x1aX\n\rRequestOption\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x10\n\x0erequest_optionB\x0f\n\r_operation_idB\x0e\n\x0c_client_type"\x88\x0e\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12!\n\x0coperation_id\x18\x0c \x01(\tR\x0boperationId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12~\n#write_stream_operation_start_result\x18\x08 \x01(\x0b\x32..spark.connect.WriteStreamOperationStartResultH\x00R\x1fwriteStreamOperationStartResult\x12q\n\x1estreaming_query_command_result\x18\t \x01(\x0b\x32*.spark.connect.StreamingQueryCommandResultH\x00R\x1bstreamingQueryCommandResult\x12k\n\x1cget_resources_command_result\x18\n \x01(\x0b\x32(.spark.connect.GetResourcesCommandResultH\x00R\x19getResourcesCommandResult\x12\x87\x01\n&streaming_query_manager_command_result\x18\x0b \x01(\x0b\x32\x31.spark.connect.StreamingQueryManagerCommandResultH\x00R"streamingQueryManagerCommandResult\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x12/\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1a=\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1a`\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06valuesB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x84\x08\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB\x0e\n\x0c_client_type"z\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xe7\x06\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x06 \x01(\tH\x01R\nclientType\x88\x01\x01\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payloadB\x0e\n\x0c_client_type"\xbc\x01\n\x14\x41\x64\x64\x41rtifactsResponse\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful"\xc3\x01\n\x17\x41rtifactStatusesRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x12\x14\n\x05names\x18\x04 \x03(\tR\x05namesB\x0e\n\x0c_client_type"\x8c\x02\n\x18\x41rtifactStatusesResponse\x12Q\n\x08statuses\x18\x01 \x03(\x0b\x32\x35.spark.connect.ArtifactStatusesResponse.StatusesEntryR\x08statuses\x1a(\n\x0e\x41rtifactStatus\x12\x16\n\x06\x65xists\x18\x01 \x01(\x08R\x06\x65xists\x1as\n\rStatusesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ArtifactStatusesResponse.ArtifactStatusR\x05value:\x02\x38\x01"\xd8\x03\n\x10InterruptRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12T\n\x0einterrupt_type\x18\x04 \x01(\x0e\x32-.spark.connect.InterruptRequest.InterruptTypeR\rinterruptType\x12%\n\roperation_tag\x18\x05 \x01(\tH\x00R\x0coperationTag\x12#\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId"\x80\x01\n\rInterruptType\x12\x1e\n\x1aINTERRUPT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12INTERRUPT_TYPE_ALL\x10\x01\x12\x16\n\x12INTERRUPT_TYPE_TAG\x10\x02\x12\x1f\n\x1bINTERRUPT_TYPE_OPERATION_ID\x10\x03\x42\x0b\n\tinterruptB\x0e\n\x0c_client_type"[\n\x11InterruptResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\'\n\x0finterrupted_ids\x18\x02 \x03(\tR\x0einterruptedIds2\xa4\x04\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x12\x63\n\x0e\x41rtifactStatus\x12&.spark.connect.ArtifactStatusesRequest\x1a\'.spark.connect.ArtifactStatusesResponse"\x00\x12P\n\tInterrupt\x12\x1f.spark.connect.InterruptRequest\x1a .spark.connect.InterruptResponse"\x00\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' ) _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) @@ -179,11 +179,11 @@ _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_start = 9488 _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_end = 9603 _INTERRUPTREQUEST._serialized_start = 9606 - _INTERRUPTREQUEST._serialized_end = 10067 - _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_start = 9920 - _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_end = 10038 - _INTERRUPTRESPONSE._serialized_start = 10069 - _INTERRUPTRESPONSE._serialized_end = 10160 - _SPARKCONNECTSERVICE._serialized_start = 10163 - _SPARKCONNECTSERVICE._serialized_end = 10711 + _INTERRUPTREQUEST._serialized_end = 10078 + _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_start = 9921 + _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_end = 10049 + _INTERRUPTRESPONSE._serialized_start = 10080 + _INTERRUPTRESPONSE._serialized_end = 10171 + _SPARKCONNECTSERVICE._serialized_start = 10174 + _SPARKCONNECTSERVICE._serialized_end = 10722 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/connect/proto/base_pb2.pyi b/python/pyspark/sql/connect/proto/base_pb2.pyi index 0533ad6a4837..35fc815ab6b6 100644 --- a/python/pyspark/sql/connect/proto/base_pb2.pyi +++ b/python/pyspark/sql/connect/proto/base_pb2.pyi @@ -2253,7 +2253,7 @@ class InterruptRequest(google.protobuf.message.Message): """Interrupt all running executions within the session with the provided session_id.""" INTERRUPT_TYPE_TAG: InterruptRequest._InterruptType.ValueType # 2 """Interrupt all running executions within the session with the provided operation_tag.""" - INTERRUPT_TYPE_ID: InterruptRequest._InterruptType.ValueType # 3 + INTERRUPT_TYPE_OPERATION_ID: InterruptRequest._InterruptType.ValueType # 3 """Interrupt the running execution within the session with the provided operation_id.""" class InterruptType(_InterruptType, metaclass=_InterruptTypeEnumTypeWrapper): ... @@ -2262,7 +2262,7 @@ class InterruptRequest(google.protobuf.message.Message): """Interrupt all running executions within the session with the provided session_id.""" INTERRUPT_TYPE_TAG: InterruptRequest.InterruptType.ValueType # 2 """Interrupt all running executions within the session with the provided operation_tag.""" - INTERRUPT_TYPE_ID: InterruptRequest.InterruptType.ValueType # 3 + INTERRUPT_TYPE_OPERATION_ID: InterruptRequest.InterruptType.ValueType # 3 """Interrupt the running execution within the session with the provided operation_id.""" SESSION_ID_FIELD_NUMBER: builtins.int @@ -2291,7 +2291,7 @@ class InterruptRequest(google.protobuf.message.Message): operation_tag: builtins.str """if interrupt_tag == INTERRUPT_TYPE_TAG, interrupt operation with this tag.""" operation_id: builtins.str - """if interrupt_tag == INTERRUPT_TYPE_ID, interrupt operation with this operation_id.""" + """if interrupt_tag == INTERRUPT_TYPE_OPERATION_ID, interrupt operation with this operation_id.""" def __init__( self, *, From a297d45eac191c2f450da81c0c90e95db09301b6 Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Wed, 19 Jul 2023 20:57:09 +0200 Subject: [PATCH 09/21] scala client, without tests --- .../org/apache/spark/sql/SparkSession.scala | 66 +++++++++++++++++-- .../connect/client/SparkConnectClient.scala | 56 ++++++++++++++++ .../sql/connect/client/SparkResult.scala | 23 +++++++ .../main/protobuf/spark/connect/base.proto | 1 + .../spark/sql/connect/common/ProtoUtils.scala | 24 +++++++ .../execution/ExecuteResponseObserver.scala | 25 ++++++- .../sql/connect/service/ExecuteHolder.scala | 36 +++++----- 7 files changed, 204 insertions(+), 27 deletions(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala index fb9959c99428..b84880cfc912 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -613,16 +613,30 @@ class SparkSession private[sql] ( /** * Interrupt all operations of this session currently running on the connected server. * - * TODO/WIP: Currently it will interrupt the Spark Jobs running on the server, triggered from - * ExecutePlan requests. If an operation is not running a Spark Job, it becomes an noop and the - * operation will continue afterwards, possibly with more Spark Jobs. - * * @since 3.5.0 */ def interruptAll(): Unit = { client.interruptAll() } + /** + * Interrupt all operations of this session with the given operation tag. + * + * @since 3.5.0 + */ + def interruptTag(tag: String): Unit = { + client.interruptTag(tag) + } + + /** + * Interrupt an operation of this session with the given operationId. + * + * @since 3.5.0 + */ + def interruptOperation(operationId: String): Unit = { + client.interruptOperation(operationId) + } + /** * Synonym for `close()`. * @@ -641,6 +655,50 @@ class SparkSession private[sql] ( allocator.close() SparkSession.onSessionClose(this) } + + /** + * Add a tag to be assigned to all the operations started by this thread in this session. + * + * @param tag + * The tag to be added. Cannot contain ',' (comma) character or be an empty string. + * + * @since 3.5.0 + */ + def addTag(tag: String): Unit = { + client.addTag(tag) + } + + /** + * Remove a tag previously added to be assigned to all the operations started by this thread in + * this session. Noop if such a tag was not added earlier. + * + * @param tag + * The tag to be removed. Cannot contain ',' (comma) character or be an empty string. + * + * @since 3.5.0 + */ + def removeTag(tag: String): Unit = { + client.removeTag(tag) + } + + /** + * Get the tags that are currently set to be assigned to all the operations started by this + * thread. + * + * @since 3.5.0 + */ + def getTags(): Set[String] = { + client.getTags() + } + + /** + * Clear the current thread's operation tags. + * + * @since 3.5.0 + */ + def clearTags(): Unit = { + client.clearTags() + } } // The minimal builder needed to create a spark session. diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala index b41ae5555bf3..75cc5eb7f49e 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala @@ -21,11 +21,15 @@ import java.net.URI import java.util.UUID import java.util.concurrent.Executor +import scala.collection.JavaConverters._ +import scala.collection.mutable + import com.google.protobuf.ByteString import io.grpc._ import org.apache.spark.connect.proto import org.apache.spark.connect.proto.UserContext +import org.apache.spark.sql.connect.common.ProtoUtils import org.apache.spark.sql.connect.common.config.ConnectCommon /** @@ -76,6 +80,7 @@ private[sql] class SparkConnectClient( .setUserContext(userContext) .setSessionId(sessionId) .setClientType(userAgent) + .addAllTags(tags.get.toSeq.asJava) .build() bstub.executePlan(request) } @@ -195,6 +200,57 @@ private[sql] class SparkConnectClient( bstub.interrupt(request) } + private[sql] def interruptTag(tag: String): proto.InterruptResponse = { + val builder = proto.InterruptRequest.newBuilder() + val request = builder + .setUserContext(userContext) + .setSessionId(sessionId) + .setClientType(userAgent) + .setInterruptType(proto.InterruptRequest.InterruptType.INTERRUPT_TYPE_TAG) + .setOperationTag(tag) + .build() + bstub.interrupt(request) + } + + private[sql] def interruptOperation(id: String): proto.InterruptResponse = { + val builder = proto.InterruptRequest.newBuilder() + val request = builder + .setUserContext(userContext) + .setSessionId(sessionId) + .setClientType(userAgent) + .setInterruptType(proto.InterruptRequest.InterruptType.INTERRUPT_TYPE_OPERATION_ID) + .setOperationId(id) + .build() + bstub.interrupt(request) + } + + private[this] val tags = new InheritableThreadLocal[mutable.Set[String]] { + override def childValue(parent: mutable.Set[String]): mutable.Set[String] = { + // Note: make a clone such that changes in the parent tags aren't reflected in + // the those of the children threads. + parent.clone() + } + override protected def initialValue(): mutable.Set[String] = new mutable.HashSet[String]() + } + + private[sql] def addTag(tag: String): Unit = { + ProtoUtils.throwIfInvalidTag(tag) + tags.get += tag + } + + private[sql] def removeTag(tag: String): Unit = { + ProtoUtils.throwIfInvalidTag(tag) + tags.get.remove(tag) + } + + private[sql] def getTags(): Set[String] = { + tags.get.toSet + } + + private[sql] def clearTags(): Unit = { + tags.get.clear() + } + def copy(): SparkConnectClient = configuration.toSparkConnectClient /** diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala index 1cdc2035de60..fca7e7a635dd 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala @@ -40,6 +40,7 @@ private[sql] class SparkResult[T]( extends AutoCloseable with Cleanable { self => + private[this] var opId: String = null private[this] var numRecords: Int = 0 private[this] var structType: StructType = _ private[this] var arrowSchema: pojo.Schema = _ @@ -79,6 +80,19 @@ private[sql] class SparkResult[T]( var stop = false while (!stop && responses.hasNext) { val response = responses.next() + + // Save and validate operationId + if (opId == null) { + opId = response.getOperationId + } + if (opId != response.getOperationId) { + // backwards compatibility: + // response from an old server without operationId field would have getOperationId == "". + throw new IllegalStateException( + "Received response with wrong operationId. " + + s"Expected '$opId' but received '${response.getOperationId}'.") + } + if (response.hasSchema) { // The original schema should arrive before ArrowBatches. structType = @@ -148,6 +162,15 @@ private[sql] class SparkResult[T]( structType } + /** + * @return + * the operationId of the result. + */ + def operationId: String = { + processResponses(stopOnFirstNonEmptyResponse = true) + opId + } + /** * Create an Array with the contents of the result. */ diff --git a/connector/connect/common/src/main/protobuf/spark/connect/base.proto b/connector/connect/common/src/main/protobuf/spark/connect/base.proto index 5449eb56d721..d935ae65328d 100644 --- a/connector/connect/common/src/main/protobuf/spark/connect/base.proto +++ b/connector/connect/common/src/main/protobuf/spark/connect/base.proto @@ -307,6 +307,7 @@ message ExecutePlanRequest { } // Tags to tag the given execution with. + // Tags cannot contain ',' character and cannot be empty strings. // Used by Interrupt with interrupt.tag. repeated string tags = 7; } diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala index e0c7d267c604..f27278c60001 100644 --- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala +++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala @@ -81,4 +81,28 @@ private[connect] object ProtoUtils { private def createString(prefix: String, size: Int): String = { s"$prefix[truncated(size=${format.format(size)})]" } + + // Because Spark Connect operation tags are also set as SparkContext Job tags, they cannot contain + // SparkContext.SPARK_JOB_TAGS_SEP + private var SPARK_JOB_TAGS_SEP = ',' // SparkContext.SPARK_JOB_TAGS_SEP + + /** + * Validate if a tag for ExecutePlanRequest.tags is valid. Throw IllegalArgumentException if + * not. + */ + def throwIfInvalidTag(tag: String): Unit = { + // Same format rules apply to Spark Connect execution tags as to SparkContext job tags, + // because the Spark Connect job tag is also used as part of SparkContext job tag. + // See SparkContext.throwIfInvalidTag and ExecuteHolder.tagToSparkJobTag + if (tag == null) { + throw new IllegalArgumentException("Spark Connect execution tag cannot be null.") + } + if (tag.contains(SPARK_JOB_TAGS_SEP)) { + throw new IllegalArgumentException( + s"Spark Connect execution tag cannot contain '$SPARK_JOB_TAGS_SEP'.") + } + if (tag.isEmpty) { + throw new IllegalArgumentException("Spark Connect execution tag cannot be an empty string.") + } + } } diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala index 5aecbdfce163..93da791b88c2 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala @@ -21,7 +21,9 @@ import scala.collection.mutable import io.grpc.stub.StreamObserver +import org.apache.spark.connect.proto import org.apache.spark.internal.Logging +import org.apache.spark.sql.connect.service.ExecuteHolder /** * This StreamObserver is running on the execution thread. Execution pushes responses to it, it @@ -40,7 +42,9 @@ import org.apache.spark.internal.Logging * @see * attachConsumer */ -private[connect] class ExecuteResponseObserver[T]() extends StreamObserver[T] with Logging { +private[connect] class ExecuteResponseObserver[T](val executeHolder: ExecuteHolder) + extends StreamObserver[T] + with Logging { /** * Cached responses produced by the execution. Map from response index -> response. Response @@ -77,7 +81,9 @@ private[connect] class ExecuteResponseObserver[T]() extends StreamObserver[T] wi throw new IllegalStateException("Stream onNext can't be called after stream completed") } lastProducedIndex += 1 - responses += ((lastProducedIndex, CachedStreamResponse[T](r, lastProducedIndex))) + val processedResponse = setCommonResponseFields(r) + responses += + ((lastProducedIndex, CachedStreamResponse[T](processedResponse, lastProducedIndex))) logDebug(s"Saved response with index=$lastProducedIndex") notifyAll() } @@ -158,4 +164,19 @@ private[connect] class ExecuteResponseObserver[T]() extends StreamObserver[T] wi i -= 1 } } + + /** + * Make sure that response fields that common should be set in every response are populated. + */ + private def setCommonResponseFields(response: T): T = { + response match { + case executePlanResponse: proto.ExecutePlanResponse => + executePlanResponse + .toBuilder() + .setSessionId(executeHolder.sessionHolder.sessionId) + .setOperationId(executeHolder.operationId) + .build() + .asInstanceOf[T] + } + } } diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala index c90f2a570b56..c0f4f8747ce5 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala @@ -19,9 +19,9 @@ package org.apache.spark.sql.connect.service import scala.collection.JavaConverters._ -import org.apache.spark.SparkContext import org.apache.spark.connect.proto import org.apache.spark.internal.Logging +import org.apache.spark.sql.connect.common.ProtoUtils import org.apache.spark.sql.connect.execution.{ExecuteGrpcResponseSender, ExecuteResponseObserver, ExecuteThreadRunner} import org.apache.spark.util.SystemClock @@ -40,15 +40,20 @@ private[connect] class ExecuteHolder( s"Session_${sessionHolder.sessionId}_" + s"Request_${operationId}" - val userDefinedTags: Seq[String] = request.getTagsList().asScala.toSeq.map { tag => - throwIfInvalidTag(tag) - tag - } + val userDefinedTags: Set[String] = request + .getTagsList() + .asScala + .toSeq + .map { tag => + ProtoUtils.throwIfInvalidTag(tag) + tag + } + .toSet val session = sessionHolder.session val responseObserver: ExecuteResponseObserver[proto.ExecutePlanResponse] = - new ExecuteResponseObserver[proto.ExecutePlanResponse]() + new ExecuteResponseObserver[proto.ExecutePlanResponse](this) val eventsManager: ExecuteEventsManager = ExecuteEventsManager(this, new SystemClock()) @@ -98,23 +103,12 @@ private[connect] class ExecuteHolder( runner.interrupt() } + /** + * Spark Connect tags are also added as SparkContext job tags, but to make the tag unique, they + * need to be combined with userId and sessionId. + */ def tagToSparkJobTag(tag: String): String = { "SparkConnectUserDefinedTag_" + s"User_${sessionHolder.userId}_Session_${sessionHolder.sessionId}" } - - private def throwIfInvalidTag(tag: String) = { - // Same format rules apply to Spark Connect execution tags as to SparkContext job tags. - // see SparkContext.throwIfInvalidTag. - if (tag == null) { - throw new IllegalArgumentException("Spark Connect execution tag cannot be null.") - } - if (tag.contains(SparkContext.SPARK_JOB_TAGS_SEP)) { - throw new IllegalArgumentException( - s"Spark Connect execution tag cannot contain '${SparkContext.SPARK_JOB_TAGS_SEP}'.") - } - if (tag.isEmpty) { - throw new IllegalArgumentException("Spark Connect execution tag cannot be an empty string.") - } - } } From e6bdcaba44683faa64e760065f0622e240f57e2f Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Wed, 19 Jul 2023 21:21:58 +0200 Subject: [PATCH 10/21] check returned operations id in interruptAll --- .../org/apache/spark/sql/SparkSession.scala | 24 ++++++++++++++----- .../connect/client/SparkConnectClient.scala | 2 +- .../spark/sql/SparkSessionE2ESuite.scala | 12 ++++++++-- .../execution/ExecuteThreadRunner.scala | 7 ++++++ 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala index b84880cfc912..d9dd6a610841 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -613,28 +613,40 @@ class SparkSession private[sql] ( /** * Interrupt all operations of this session currently running on the connected server. * + * @return + * sequence of operationIds of interrupted operations. Note: there is still a possiblility of + * operation finishing just as it is interrupted. + * * @since 3.5.0 */ - def interruptAll(): Unit = { - client.interruptAll() + def interruptAll(): Seq[String] = { + client.interruptAll().getInterruptedIdsList.asScala } /** * Interrupt all operations of this session with the given operation tag. * + * @return + * sequence of operationIds of interrupted operations. Note: there is still a possiblility of + * operation finishing just as it is interrupted. + * * @since 3.5.0 */ - def interruptTag(tag: String): Unit = { - client.interruptTag(tag) + def interruptTag(tag: String): Seq[String] = { + client.interruptTag(tag).getInterruptedIdsList.asScala } /** * Interrupt an operation of this session with the given operationId. * + * @return + * sequence of operationIds of interrupted operations. Note: there is still a possiblility of + * operation finishing just as it is interrupted. + * * @since 3.5.0 */ - def interruptOperation(operationId: String): Unit = { - client.interruptOperation(operationId) + def interruptOperation(operationId: String): Seq[String] = { + client.interruptOperation(operationId).getInterruptedIdsList.asScala } /** diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala index 75cc5eb7f49e..a330322aed7e 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala @@ -227,7 +227,7 @@ private[sql] class SparkConnectClient( private[this] val tags = new InheritableThreadLocal[mutable.Set[String]] { override def childValue(parent: mutable.Set[String]): mutable.Set[String] = { // Note: make a clone such that changes in the parent tags aren't reflected in - // the those of the children threads. + // those of the children threads. parent.clone() } override protected def initialValue(): mutable.Set[String] = new mutable.HashSet[String]() diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala index 70eeb6c2c41d..44a701dbcb44 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql import scala.concurrent.{ExecutionContext, ExecutionContextExecutor, Future} import scala.concurrent.duration._ +import scala.mutable import scala.util.{Failure, Success} import org.scalatest.concurrent.Eventually._ @@ -64,13 +65,16 @@ class SparkSessionE2ESuite extends RemoteSparkSession { } // 20 seconds is < 30 seconds the queries should be running, // because it should be interrupted sooner + val interrupted = mutable.ListBuffer[String]() eventually(timeout(20.seconds), interval(1.seconds)) { // keep interrupting every second, until both queries get interrupted. - spark.interruptAll() + val ids = spark.interruptAll() + interrupted ++= ids assert(error.isEmpty, s"Error not empty: $error") assert(q1Interrupted) assert(q2Interrupted) } + assert(interrupted.length == 2, s"Interrupted operations: $interrupted.") } test("interrupt all - foreground queries, background interrupt") { @@ -79,9 +83,12 @@ class SparkSessionE2ESuite extends RemoteSparkSession { implicit val ec: ExecutionContextExecutor = ExecutionContext.global @volatile var finished = false + val interrupted = mutable.ListBuffer[String]() + val interruptor = Future { eventually(timeout(20.seconds), interval(1.seconds)) { - spark.interruptAll() + val ids = spark.interruptAll() + interrupted ++= ids assert(finished) } finished @@ -96,5 +103,6 @@ class SparkSessionE2ESuite extends RemoteSparkSession { assert(e2.getMessage.contains("OPERATION_CANCELED"), s"Unexpected exception: $e2") finished = true assert(ThreadUtils.awaitResult(interruptor, 10.seconds)) + assert(interrupted.length == 2, s"Interrupted operations: $interrupted.") } } diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala index 6c2ffa465474..e1d922721424 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala @@ -85,6 +85,9 @@ private[connect] class ExecuteThreadRunner(executeHolder: ExecuteHolder) extends } } finally { executeHolder.sessionHolder.session.sparkContext.removeJobTag(executeHolder.jobTag) + executeHolder.userDefinedTags.foreach { tag => + session.sparkContext.removeJobTag(executeHolder.tagToSparkJobTag(tag)) + } } } catch { ErrorUtils.handleError( @@ -113,6 +116,10 @@ private[connect] class ExecuteThreadRunner(executeHolder: ExecuteHolder) extends // Set tag for query cancellation session.sparkContext.addJobTag(executeHolder.jobTag) + // Also set all user defined tags as Spark Job tags. + executeHolder.userDefinedTags.foreach { tag => + session.sparkContext.addJobTag(executeHolder.tagToSparkJobTag(tag)) + } session.sparkContext.setJobDescription( s"Spark Connect - ${StringUtils.abbreviate(debugString, 128)}") session.sparkContext.setInterruptOnCancel(true) From 32126e7772bd87efbabb51584bf4abdcada16e96 Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Wed, 19 Jul 2023 22:34:11 +0200 Subject: [PATCH 11/21] interruptTag test --- .../spark/sql/SparkSessionE2ESuite.scala | 114 +++++++++++++++++- .../execution/ExecuteThreadRunner.scala | 3 +- .../sql/connect/service/ExecuteHolder.scala | 2 +- 3 files changed, 114 insertions(+), 5 deletions(-) diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala index 44a701dbcb44..52700728bf3f 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala @@ -16,9 +16,9 @@ */ package org.apache.spark.sql +import scala.collection.mutable import scala.concurrent.{ExecutionContext, ExecutionContextExecutor, Future} import scala.concurrent.duration._ -import scala.mutable import scala.util.{Failure, Success} import org.scalatest.concurrent.Eventually._ @@ -74,7 +74,7 @@ class SparkSessionE2ESuite extends RemoteSparkSession { assert(q1Interrupted) assert(q2Interrupted) } - assert(interrupted.length == 2, s"Interrupted operations: $interrupted.") + assert(interrupted.distinct.length == 2, s"Interrupted operations: ${interrupted.distinct}.") } test("interrupt all - foreground queries, background interrupt") { @@ -103,6 +103,114 @@ class SparkSessionE2ESuite extends RemoteSparkSession { assert(e2.getMessage.contains("OPERATION_CANCELED"), s"Unexpected exception: $e2") finished = true assert(ThreadUtils.awaitResult(interruptor, 10.seconds)) - assert(interrupted.length == 2, s"Interrupted operations: $interrupted.") + assert(interrupted.distinct.length == 2, s"Interrupted operations: ${interrupted.distinct}.") + } + + test("interrupt tag") { + val session = spark + import session.implicits._ + + // global ExecutionContext has only 2 threads in Apache Spark CI + // create own thread pool for four Futures used in this test + val numThreads = 4 + val fpool = ThreadUtils.newForkJoinPool("job-tags-test-thread-pool", numThreads) + val executionContext = ExecutionContext.fromExecutorService(fpool) + + val q1 = Future { + assert(spark.getTags() == Set()) + spark.addTag("two") + assert(spark.getTags() == Set("two")) + spark.clearTags() // check that clearing all tags works + assert(spark.getTags() == Set()) + spark.addTag("one") + assert(spark.getTags() == Set("one")) + try { + spark.range(10).map(n => { + Thread.sleep(30000); n + }).collect() + } finally { + spark.clearTags() // clear for the case of thread reuse by another Future + } + }(executionContext) + val q2 = Future { + assert(spark.getTags() == Set()) + spark.addTag("one") + spark.addTag("two") + spark.addTag("one") + spark.addTag("two") // duplicates shouldn't matter + try { + spark.range(10).map(n => { + Thread.sleep(30000); n + }).collect() + } finally { + spark.clearTags() // clear for the case of thread reuse by another Future + } + }(executionContext) + val q3 = Future { + assert(spark.getTags() == Set()) + spark.addTag("foo") + spark.removeTag("foo") + assert(spark.getTags() == Set()) // check that remove works removing the last tag + spark.addTag("two") + assert(spark.getTags() == Set("two")) + try { + spark.range(10).map(n => { + Thread.sleep(30000); n + }).collect() + } finally { + spark.clearTags() // clear for the case of thread reuse by another Future + } + }(executionContext) + val q4 = Future { + assert(spark.getTags() == Set()) + spark.addTag("one") + spark.addTag("two") + spark.addTag("two") + assert(spark.getTags() == Set("one", "two")) + spark.removeTag("two") // check that remove works, despite duplicate add + assert(spark.getTags() == Set("one")) + try { + spark.range(10).map(n => { + Thread.sleep(30000); n + }).collect() + } finally { + spark.clearTags() // clear for the case of thread reuse by another Future + } + }(executionContext) + val interrupted = mutable.ListBuffer[String]() + + // q2 and q3 should be cancelled + interrupted.clear() + eventually(timeout(20.seconds), interval(1.seconds)) { + val ids = spark.interruptTag("two") + interrupted ++= ids + assert(interrupted.distinct.length == 2, s"Interrupted operations: ${interrupted.distinct}.") + } + val e2 = intercept[SparkException] { + ThreadUtils.awaitResult(q2, 1.minute) + }.getCause + assert(e2.getMessage contains "OPERATION_CANCELED") + val e3 = intercept[SparkException] { + ThreadUtils.awaitResult(q3, 1.minute) + }.getCause + assert(e3.getMessage contains "OPERATION_CANCELED") + assert(interrupted.distinct.length == 2, s"Interrupted operations: ${interrupted.distinct}.") + + // q1 and q4 should be cancelled + interrupted.clear() + eventually(timeout(20.seconds), interval(1.seconds)) { + val ids = spark.interruptTag("one") + interrupted ++= ids + assert(interrupted.distinct.length == 2, s"Interrupted operations: ${interrupted.distinct}.") + } + val e1 = intercept[SparkException] { + ThreadUtils.awaitResult(q1, 1.minute) + }.getCause + assert(e1.getMessage contains "OPERATION_CANCELED") + val e4 = intercept[SparkException] { + ThreadUtils.awaitResult(q4, 1.minute) + }.getCause + assert(e4.getMessage contains "OPERATION_CANCELED") + assert(interrupted.distinct.length == 2, s"Interrupted operations: ${interrupted.distinct}.") } } diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala index e1d922721424..041e00725cf1 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala @@ -86,7 +86,8 @@ private[connect] class ExecuteThreadRunner(executeHolder: ExecuteHolder) extends } finally { executeHolder.sessionHolder.session.sparkContext.removeJobTag(executeHolder.jobTag) executeHolder.userDefinedTags.foreach { tag => - session.sparkContext.removeJobTag(executeHolder.tagToSparkJobTag(tag)) + executeHolder.sessionHolder.session.sparkContext.removeJobTag( + executeHolder.tagToSparkJobTag(tag)) } } } catch { diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala index c0f4f8747ce5..f3eef98e2e5a 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala @@ -108,7 +108,7 @@ private[connect] class ExecuteHolder( * need to be combined with userId and sessionId. */ def tagToSparkJobTag(tag: String): String = { - "SparkConnectUserDefinedTag_" + + "SparkConnect_Tag_" + s"User_${sessionHolder.userId}_Session_${sessionHolder.sessionId}" } } From 2e32219034ceca736e8d9baa4dd9028689923dc9 Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Wed, 19 Jul 2023 22:39:13 +0200 Subject: [PATCH 12/21] add to SparkListenerConnectOperationStarted --- .../spark/sql/connect/service/ExecuteEventsManager.scala | 8 +++++++- .../sql/connect/service/ExecuteEventsManagerSuite.scala | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala index 0af54f034a25..130700fdf671 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala @@ -59,6 +59,8 @@ case class ExecuteEventsManager(executeHolder: ExecuteHolder, clock: Clock) { private def jobTag = executeHolder.jobTag + private def userDefinedTags = executeHolder.userDefinedTags + private def listenerBus = sessionHolder.session.sparkContext.listenerBus private def sessionHolder = executeHolder.sessionHolder @@ -119,7 +121,8 @@ case class ExecuteEventsManager(executeHolder: ExecuteHolder, clock: Clock) { Utils.redact( sessionHolder.session.sessionState.conf.stringRedactionPattern, ProtoUtils.abbreviate(plan, ExecuteEventsManager.MAX_STATEMENT_TEXT_SIZE).toString), - Some(request))) + Some(request), + userDefinedTags)) } /** @@ -270,6 +273,8 @@ case class ExecuteEventsManager(executeHolder: ExecuteHolder, clock: Clock) { * The connect request plan converted to text. * @param planRequest: * The Connect request. None if the operation is not of type @link proto.ExecutePlanRequest + * @param userDefinedTags: + * Extra tags set by the user (via SparkSession.addTag). * @param extraTags: * Additional metadata during the request. */ @@ -282,6 +287,7 @@ case class SparkListenerConnectOperationStarted( userName: String, statementText: String, planRequest: Option[proto.ExecutePlanRequest], + userDefinedTags: Set[String], extraTags: Map[String, String] = Map.empty) extends SparkListenerEvent diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/ExecuteEventsManagerSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/ExecuteEventsManagerSuite.scala index 365b17632a74..27c57e0d759f 100644 --- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/ExecuteEventsManagerSuite.scala +++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/ExecuteEventsManagerSuite.scala @@ -64,6 +64,7 @@ class ExecuteEventsManagerSuite DEFAULT_USER_NAME, DEFAULT_TEXT, Some(events.executeHolder.request), + Set.empty, Map.empty)) } From 608c79226362cddc24646ec10dd928ca3635ff0c Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Wed, 19 Jul 2023 23:18:50 +0200 Subject: [PATCH 13/21] interrupt operation test --- .../sql/connect/client/SparkResult.scala | 6 +++- .../spark/sql/SparkSessionE2ESuite.scala | 34 ++++++++++++++----- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala index fca7e7a635dd..f2380d830bbe 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala @@ -73,6 +73,7 @@ private[sql] class SparkResult[T]( } private def processResponses( + stopOnOperationId: Boolean = false, stopOnSchema: Boolean = false, stopOnArrowSchema: Boolean = false, stopOnFirstNonEmptyResponse: Boolean = false): Boolean = { @@ -92,6 +93,7 @@ private[sql] class SparkResult[T]( "Received response with wrong operationId. " + s"Expected '$opId' but received '${response.getOperationId}'.") } + stop |= stopOnOperationId if (response.hasSchema) { // The original schema should arrive before ArrowBatches. @@ -167,7 +169,9 @@ private[sql] class SparkResult[T]( * the operationId of the result. */ def operationId: String = { - processResponses(stopOnFirstNonEmptyResponse = true) + if (opId == null) { + processResponses(stopOnOperationId = true) + } opId } diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala index 52700728bf3f..98fa3d6b81f7 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala @@ -188,12 +188,12 @@ class SparkSessionE2ESuite extends RemoteSparkSession { } val e2 = intercept[SparkException] { ThreadUtils.awaitResult(q2, 1.minute) - }.getCause - assert(e2.getMessage contains "OPERATION_CANCELED") + } + assert(e2.getCause.getMessage contains "OPERATION_CANCELED") val e3 = intercept[SparkException] { ThreadUtils.awaitResult(q3, 1.minute) - }.getCause - assert(e3.getMessage contains "OPERATION_CANCELED") + } + assert(e3.getCause.getMessage contains "OPERATION_CANCELED") assert(interrupted.distinct.length == 2, s"Interrupted operations: ${interrupted.distinct}.") // q1 and q4 should be cancelled @@ -205,12 +205,30 @@ class SparkSessionE2ESuite extends RemoteSparkSession { } val e1 = intercept[SparkException] { ThreadUtils.awaitResult(q1, 1.minute) - }.getCause - assert(e1.getMessage contains "OPERATION_CANCELED") + } + assert(e1.getCause.getMessage contains "OPERATION_CANCELED") val e4 = intercept[SparkException] { ThreadUtils.awaitResult(q4, 1.minute) - }.getCause - assert(e4.getMessage contains "OPERATION_CANCELED") + } + assert(e4.getCause.getMessage contains "OPERATION_CANCELED") assert(interrupted.distinct.length == 2, s"Interrupted operations: ${interrupted.distinct}.") } + + test("interrupt operation") { + val session = spark + import session.implicits._ + + val result = spark.range(10).map(n => { + Thread.sleep(5000); n + }).collectResult() + // cancel + val operationId = result.operationId + val canceledId = spark.interruptOperation(operationId) + assert(canceledId == Seq(operationId)) + // and check that it got canceled + val e = intercept[SparkException] { + result.toArray + } + assert(e.getMessage contains "OPERATION_CANCELED") + } } From 5112ccfc48c7a2805773f0d15facb1e2f25c65a3 Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Wed, 19 Jul 2023 23:22:28 +0200 Subject: [PATCH 14/21] lint --- .../spark/sql/SparkSessionE2ESuite.scala | 53 +++++++++++++------ .../execution/ExecuteThreadRunner.scala | 4 +- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala index 98fa3d6b81f7..61e196abffe0 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala @@ -125,9 +125,12 @@ class SparkSessionE2ESuite extends RemoteSparkSession { spark.addTag("one") assert(spark.getTags() == Set("one")) try { - spark.range(10).map(n => { - Thread.sleep(30000); n - }).collect() + spark + .range(10) + .map(n => { + Thread.sleep(30000); n + }) + .collect() } finally { spark.clearTags() // clear for the case of thread reuse by another Future } @@ -139,9 +142,12 @@ class SparkSessionE2ESuite extends RemoteSparkSession { spark.addTag("one") spark.addTag("two") // duplicates shouldn't matter try { - spark.range(10).map(n => { - Thread.sleep(30000); n - }).collect() + spark + .range(10) + .map(n => { + Thread.sleep(30000); n + }) + .collect() } finally { spark.clearTags() // clear for the case of thread reuse by another Future } @@ -154,9 +160,12 @@ class SparkSessionE2ESuite extends RemoteSparkSession { spark.addTag("two") assert(spark.getTags() == Set("two")) try { - spark.range(10).map(n => { - Thread.sleep(30000); n - }).collect() + spark + .range(10) + .map(n => { + Thread.sleep(30000); n + }) + .collect() } finally { spark.clearTags() // clear for the case of thread reuse by another Future } @@ -170,9 +179,12 @@ class SparkSessionE2ESuite extends RemoteSparkSession { spark.removeTag("two") // check that remove works, despite duplicate add assert(spark.getTags() == Set("one")) try { - spark.range(10).map(n => { - Thread.sleep(30000); n - }).collect() + spark + .range(10) + .map(n => { + Thread.sleep(30000); n + }) + .collect() } finally { spark.clearTags() // clear for the case of thread reuse by another Future } @@ -184,7 +196,9 @@ class SparkSessionE2ESuite extends RemoteSparkSession { eventually(timeout(20.seconds), interval(1.seconds)) { val ids = spark.interruptTag("two") interrupted ++= ids - assert(interrupted.distinct.length == 2, s"Interrupted operations: ${interrupted.distinct}.") + assert( + interrupted.distinct.length == 2, + s"Interrupted operations: ${interrupted.distinct}.") } val e2 = intercept[SparkException] { ThreadUtils.awaitResult(q2, 1.minute) @@ -201,7 +215,9 @@ class SparkSessionE2ESuite extends RemoteSparkSession { eventually(timeout(20.seconds), interval(1.seconds)) { val ids = spark.interruptTag("one") interrupted ++= ids - assert(interrupted.distinct.length == 2, s"Interrupted operations: ${interrupted.distinct}.") + assert( + interrupted.distinct.length == 2, + s"Interrupted operations: ${interrupted.distinct}.") } val e1 = intercept[SparkException] { ThreadUtils.awaitResult(q1, 1.minute) @@ -218,9 +234,12 @@ class SparkSessionE2ESuite extends RemoteSparkSession { val session = spark import session.implicits._ - val result = spark.range(10).map(n => { - Thread.sleep(5000); n - }).collectResult() + val result = spark + .range(10) + .map(n => { + Thread.sleep(5000); n + }) + .collectResult() // cancel val operationId = result.operationId val canceledId = spark.interruptOperation(operationId) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala index 041e00725cf1..57c64c24ee50 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala @@ -86,8 +86,8 @@ private[connect] class ExecuteThreadRunner(executeHolder: ExecuteHolder) extends } finally { executeHolder.sessionHolder.session.sparkContext.removeJobTag(executeHolder.jobTag) executeHolder.userDefinedTags.foreach { tag => - executeHolder.sessionHolder.session.sparkContext.removeJobTag( - executeHolder.tagToSparkJobTag(tag)) + executeHolder.sessionHolder.session.sparkContext + .removeJobTag(executeHolder.tagToSparkJobTag(tag)) } } } catch { From 3655ebf12270acf6682fca2a4c080f95c714a6fe Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Wed, 19 Jul 2023 23:32:31 +0200 Subject: [PATCH 15/21] self review --- .../spark/sql/connect/client/SparkConnectClient.scala | 2 ++ .../org/apache/spark/sql/connect/common/ProtoUtils.scala | 6 +++--- .../sql/connect/execution/ExecuteResponseObserver.scala | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala index a330322aed7e..d03d27a6f53d 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala @@ -234,11 +234,13 @@ private[sql] class SparkConnectClient( } private[sql] def addTag(tag: String): Unit = { + // validation is also done server side, but this will give error earlier. ProtoUtils.throwIfInvalidTag(tag) tags.get += tag } private[sql] def removeTag(tag: String): Unit = { + // validation is also done server side, but this will give error earlier. ProtoUtils.throwIfInvalidTag(tag) tags.get.remove(tag) } diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala index f27278c60001..e2934b567449 100644 --- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala +++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala @@ -95,14 +95,14 @@ private[connect] object ProtoUtils { // because the Spark Connect job tag is also used as part of SparkContext job tag. // See SparkContext.throwIfInvalidTag and ExecuteHolder.tagToSparkJobTag if (tag == null) { - throw new IllegalArgumentException("Spark Connect execution tag cannot be null.") + throw new IllegalArgumentException("Spark Connect tag cannot be null.") } if (tag.contains(SPARK_JOB_TAGS_SEP)) { throw new IllegalArgumentException( - s"Spark Connect execution tag cannot contain '$SPARK_JOB_TAGS_SEP'.") + s"Spark Connect tag cannot contain '$SPARK_JOB_TAGS_SEP'.") } if (tag.isEmpty) { - throw new IllegalArgumentException("Spark Connect execution tag cannot be an empty string.") + throw new IllegalArgumentException("Spark Connect tag cannot be an empty string.") } } } diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala index 93da791b88c2..ae89c150a68f 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala @@ -166,7 +166,7 @@ private[connect] class ExecuteResponseObserver[T](val executeHolder: ExecuteHold } /** - * Make sure that response fields that common should be set in every response are populated. + * Populate response fields that are common and should be set in every response. */ private def setCommonResponseFields(response: T): T = { response match { From 6024e24873b4be40754cd60d0d7099ce5cf6e0a7 Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Thu, 20 Jul 2023 00:42:15 +0200 Subject: [PATCH 16/21] null->_ --- .../scala/org/apache/spark/sql/connect/client/SparkResult.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala index f2380d830bbe..eed8bd3f37d9 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala @@ -40,7 +40,7 @@ private[sql] class SparkResult[T]( extends AutoCloseable with Cleanable { self => - private[this] var opId: String = null + private[this] var opId: String = _ private[this] var numRecords: Int = 0 private[this] var structType: StructType = _ private[this] var arrowSchema: pojo.Schema = _ From 63f7a056596562dc769b3183b7e7053f1ddf2966 Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Thu, 20 Jul 2023 00:56:57 +0200 Subject: [PATCH 17/21] rename userDefinedTags to sparkSessionTags --- .../sql/connect/execution/ExecuteThreadRunner.scala | 4 ++-- .../sql/connect/service/ExecuteEventsManager.scala | 8 ++++---- .../spark/sql/connect/service/ExecuteHolder.scala | 10 +++++++++- .../spark/sql/connect/service/SessionHolder.scala | 2 +- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala index 57c64c24ee50..d26f132371a6 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala @@ -85,7 +85,7 @@ private[connect] class ExecuteThreadRunner(executeHolder: ExecuteHolder) extends } } finally { executeHolder.sessionHolder.session.sparkContext.removeJobTag(executeHolder.jobTag) - executeHolder.userDefinedTags.foreach { tag => + executeHolder.sparkSessionTags.foreach { tag => executeHolder.sessionHolder.session.sparkContext .removeJobTag(executeHolder.tagToSparkJobTag(tag)) } @@ -118,7 +118,7 @@ private[connect] class ExecuteThreadRunner(executeHolder: ExecuteHolder) extends // Set tag for query cancellation session.sparkContext.addJobTag(executeHolder.jobTag) // Also set all user defined tags as Spark Job tags. - executeHolder.userDefinedTags.foreach { tag => + executeHolder.sparkSessionTags.foreach { tag => session.sparkContext.addJobTag(executeHolder.tagToSparkJobTag(tag)) } session.sparkContext.setJobDescription( diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala index 130700fdf671..5e831aaa98f2 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala @@ -59,7 +59,7 @@ case class ExecuteEventsManager(executeHolder: ExecuteHolder, clock: Clock) { private def jobTag = executeHolder.jobTag - private def userDefinedTags = executeHolder.userDefinedTags + private def sparkSessionTags = executeHolder.sparkSessionTags private def listenerBus = sessionHolder.session.sparkContext.listenerBus @@ -122,7 +122,7 @@ case class ExecuteEventsManager(executeHolder: ExecuteHolder, clock: Clock) { sessionHolder.session.sessionState.conf.stringRedactionPattern, ProtoUtils.abbreviate(plan, ExecuteEventsManager.MAX_STATEMENT_TEXT_SIZE).toString), Some(request), - userDefinedTags)) + sparkSessionTags)) } /** @@ -273,7 +273,7 @@ case class ExecuteEventsManager(executeHolder: ExecuteHolder, clock: Clock) { * The connect request plan converted to text. * @param planRequest: * The Connect request. None if the operation is not of type @link proto.ExecutePlanRequest - * @param userDefinedTags: + * @param sparkSessionTags: * Extra tags set by the user (via SparkSession.addTag). * @param extraTags: * Additional metadata during the request. @@ -287,7 +287,7 @@ case class SparkListenerConnectOperationStarted( userName: String, statementText: String, planRequest: Option[proto.ExecutePlanRequest], - userDefinedTags: Set[String], + sparkSessionTags: Set[String], extraTags: Map[String, String] = Map.empty) extends SparkListenerEvent diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala index f3eef98e2e5a..6e405262683c 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala @@ -34,13 +34,21 @@ private[connect] class ExecuteHolder( val sessionHolder: SessionHolder) extends Logging { + /** + * Tag that is set for this execution on SparkContext, via SparkContext.addJobTag. Used + * (internally) for cancallation of the Spark Jobs ran by this execution. + */ val jobTag = s"SparkConnect_Execute_" + s"User_${sessionHolder.userId}_" + s"Session_${sessionHolder.sessionId}_" + s"Request_${operationId}" - val userDefinedTags: Set[String] = request + /** + * Tags set by Spark Connect client users via SparkSession.addTag. Used to identify and group + * executions, and for user cancellation using SparkSession.interruptTag. + */ + val sparkSessionTags: Set[String] = request .getTagsList() .asScala .toSeq diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala index 3b46adab954a..bfdffe69ea16 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala @@ -108,7 +108,7 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio private[connect] def interruptTag(tag: String): Seq[String] = { val interruptedIds = new mutable.ArrayBuffer[String]() executions.asScala.values.foreach { execute => - if (execute.userDefinedTags.contains(tag)) { + if (execute.sparkSessionTags.contains(tag)) { interruptedIds += execute.operationId execute.interrupt() } From 652091ff7d96266997b8544a3f18d6f162752aad Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Thu, 20 Jul 2023 10:23:02 +0200 Subject: [PATCH 18/21] fix mima --- .../client/CheckConnectJvmClientCompatibility.scala | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala index e7f01d6140de..deb2ff631fdf 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala @@ -365,6 +365,18 @@ object CheckConnectJvmClientCompatibility { // public ProblemFilters.exclude[DirectMissingMethodProblem]( "org.apache.spark.sql.SparkSession.interruptAll"), + ProblemFilters.exclude[DirectMissingMethodProblem]( + "org.apache.spark.sql.SparkSession.interruptTag"), + ProblemFilters.exclude[DirectMissingMethodProblem]( + "org.apache.spark.sql.SparkSession.interruptOperation"), + ProblemFilters.exclude[DirectMissingMethodProblem]( + "org.apache.spark.sql.SparkSession.addTag"), + ProblemFilters.exclude[DirectMissingMethodProblem]( + "org.apache.spark.sql.SparkSession.removeTag"), + ProblemFilters.exclude[DirectMissingMethodProblem]( + "org.apache.spark.sql.SparkSession.getTags"), + ProblemFilters.exclude[DirectMissingMethodProblem]( + "org.apache.spark.sql.SparkSession.clearTags"), // SparkSession#Builder ProblemFilters.exclude[DirectMissingMethodProblem]( "org.apache.spark.sql.SparkSession#Builder.remote"), From 81986540ebc36596038dcaa5ec06fbba0543ead7 Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Thu, 20 Jul 2023 10:25:33 +0200 Subject: [PATCH 19/21] fix scala 2.13 --- .../src/main/scala/org/apache/spark/sql/SparkSession.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala index d9dd6a610841..b37e3884038b 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -620,7 +620,7 @@ class SparkSession private[sql] ( * @since 3.5.0 */ def interruptAll(): Seq[String] = { - client.interruptAll().getInterruptedIdsList.asScala + client.interruptAll().getInterruptedIdsList.asScala.toSeq } /** @@ -633,7 +633,7 @@ class SparkSession private[sql] ( * @since 3.5.0 */ def interruptTag(tag: String): Seq[String] = { - client.interruptTag(tag).getInterruptedIdsList.asScala + client.interruptTag(tag).getInterruptedIdsList.asScala.toSeq } /** @@ -646,7 +646,7 @@ class SparkSession private[sql] ( * @since 3.5.0 */ def interruptOperation(operationId: String): Seq[String] = { - client.interruptOperation(operationId).getInterruptedIdsList.asScala + client.interruptOperation(operationId).getInterruptedIdsList.asScala.toSeq } /** From e4047e3cfa490447484494173c4809bb07a1a079 Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Thu, 20 Jul 2023 11:52:21 +0200 Subject: [PATCH 20/21] regenerate python protos --- python/pyspark/sql/connect/proto/base_pb2.pyi | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyspark/sql/connect/proto/base_pb2.pyi b/python/pyspark/sql/connect/proto/base_pb2.pyi index 35fc815ab6b6..651438ea4385 100644 --- a/python/pyspark/sql/connect/proto/base_pb2.pyi +++ b/python/pyspark/sql/connect/proto/base_pb2.pyi @@ -1078,6 +1078,7 @@ class ExecutePlanRequest(google.protobuf.message.Message): self, ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: """Tags to tag the given execution with. + Tags cannot contain ',' character and cannot be empty strings. Used by Interrupt with interrupt.tag. """ def __init__( From b8a4ac7198226d0211300251f2719b4fb4b21d51 Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Thu, 20 Jul 2023 15:09:03 +0200 Subject: [PATCH 21/21] make interruptedId be returned only once to deflake test --- .../apache/spark/sql/SparkSessionE2ESuite.scala | 17 +++++++---------- .../connect/execution/ExecuteThreadRunner.scala | 17 +++++++++++++---- .../sql/connect/service/ExecuteHolder.scala | 4 +++- .../sql/connect/service/SessionHolder.scala | 15 +++++++++------ 4 files changed, 32 insertions(+), 21 deletions(-) diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala index 61e196abffe0..5afafaaa6b92 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala @@ -74,7 +74,7 @@ class SparkSessionE2ESuite extends RemoteSparkSession { assert(q1Interrupted) assert(q2Interrupted) } - assert(interrupted.distinct.length == 2, s"Interrupted operations: ${interrupted.distinct}.") + assert(interrupted.length == 2, s"Interrupted operations: $interrupted.") } test("interrupt all - foreground queries, background interrupt") { @@ -103,7 +103,7 @@ class SparkSessionE2ESuite extends RemoteSparkSession { assert(e2.getMessage.contains("OPERATION_CANCELED"), s"Unexpected exception: $e2") finished = true assert(ThreadUtils.awaitResult(interruptor, 10.seconds)) - assert(interrupted.distinct.length == 2, s"Interrupted operations: ${interrupted.distinct}.") + assert(interrupted.length == 2, s"Interrupted operations: $interrupted.") } test("interrupt tag") { @@ -141,6 +141,7 @@ class SparkSessionE2ESuite extends RemoteSparkSession { spark.addTag("two") spark.addTag("one") spark.addTag("two") // duplicates shouldn't matter + assert(spark.getTags() == Set("one", "two")) try { spark .range(10) @@ -196,9 +197,7 @@ class SparkSessionE2ESuite extends RemoteSparkSession { eventually(timeout(20.seconds), interval(1.seconds)) { val ids = spark.interruptTag("two") interrupted ++= ids - assert( - interrupted.distinct.length == 2, - s"Interrupted operations: ${interrupted.distinct}.") + assert(interrupted.length == 2, s"Interrupted operations: $interrupted.") } val e2 = intercept[SparkException] { ThreadUtils.awaitResult(q2, 1.minute) @@ -208,16 +207,14 @@ class SparkSessionE2ESuite extends RemoteSparkSession { ThreadUtils.awaitResult(q3, 1.minute) } assert(e3.getCause.getMessage contains "OPERATION_CANCELED") - assert(interrupted.distinct.length == 2, s"Interrupted operations: ${interrupted.distinct}.") + assert(interrupted.length == 2, s"Interrupted operations: $interrupted.") // q1 and q4 should be cancelled interrupted.clear() eventually(timeout(20.seconds), interval(1.seconds)) { val ids = spark.interruptTag("one") interrupted ++= ids - assert( - interrupted.distinct.length == 2, - s"Interrupted operations: ${interrupted.distinct}.") + assert(interrupted.length == 2, s"Interrupted operations: $interrupted.") } val e1 = intercept[SparkException] { ThreadUtils.awaitResult(q1, 1.minute) @@ -227,7 +224,7 @@ class SparkSessionE2ESuite extends RemoteSparkSession { ThreadUtils.awaitResult(q4, 1.minute) } assert(e4.getCause.getMessage contains "OPERATION_CANCELED") - assert(interrupted.distinct.length == 2, s"Interrupted operations: ${interrupted.distinct}.") + assert(interrupted.length == 2, s"Interrupted operations: $interrupted.") } test("interrupt operation") { diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala index d26f132371a6..6758df0d7e6d 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala @@ -54,11 +54,20 @@ private[connect] class ExecuteThreadRunner(executeHolder: ExecuteHolder) extends executionThread.join() } - /** Interrupt the executing thread. */ - def interrupt(): Unit = { + /** + * Interrupt the executing thread. + * @return + * true if it was not interrupted before, false if it was already interrupted. + */ + def interrupt(): Boolean = { synchronized { - interrupted = true - executionThread.interrupt() + if (!interrupted) { + interrupted = true + executionThread.interrupt() + true + } else { + false + } } } diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala index 6e405262683c..74530ad032f1 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala @@ -106,8 +106,10 @@ private[connect] class ExecuteHolder( /** * Interrupt the execution. Interrupts the running thread, which cancels all running Spark Jobs * and makes the execution throw an OPERATION_CANCELED error. + * @return + * true if it was not interrupted before, false if it was already interrupted. */ - def interrupt(): Unit = { + def interrupt(): Boolean = { runner.interrupt() } diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala index bfdffe69ea16..ae53d1d171f0 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala @@ -94,8 +94,9 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio private[connect] def interruptAll(): Seq[String] = { val interruptedIds = new mutable.ArrayBuffer[String]() executions.asScala.values.foreach { execute => - interruptedIds += execute.operationId - execute.interrupt() + if (execute.interrupt()) { + interruptedIds += execute.operationId + } } interruptedIds.toSeq } @@ -109,8 +110,9 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio val interruptedIds = new mutable.ArrayBuffer[String]() executions.asScala.values.foreach { execute => if (execute.sparkSessionTags.contains(tag)) { - interruptedIds += execute.operationId - execute.interrupt() + if (execute.interrupt()) { + interruptedIds += execute.operationId + } } } interruptedIds.toSeq @@ -124,8 +126,9 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio private[connect] def interruptOperation(operationId: String): Seq[String] = { val interruptedIds = new mutable.ArrayBuffer[String]() Option(executions.get(operationId)).foreach { execute => - interruptedIds += execute.operationId - execute.interrupt() + if (execute.interrupt()) { + interruptedIds += execute.operationId + } } interruptedIds.toSeq }