diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.py b/python/pyspark/sql/connect/proto/expressions_pb2.py index e9b5feda268f..1d829e87eeb2 100644 --- a/python/pyspark/sql/connect/proto/expressions_pb2.py +++ b/python/pyspark/sql/connect/proto/expressions_pb2.py @@ -40,7 +40,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x1fspark/connect/expressions.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x19spark/connect/types.proto\x1a\x1aspark/connect/common.proto"\x90\x39\n\nExpression\x12\x37\n\x06\x63ommon\x18\x12 \x01(\x0b\x32\x1f.spark.connect.ExpressionCommonR\x06\x63ommon\x12=\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x07literal\x12\x62\n\x14unresolved_attribute\x18\x02 \x01(\x0b\x32-.spark.connect.Expression.UnresolvedAttributeH\x00R\x13unresolvedAttribute\x12_\n\x13unresolved_function\x18\x03 \x01(\x0b\x32,.spark.connect.Expression.UnresolvedFunctionH\x00R\x12unresolvedFunction\x12Y\n\x11\x65xpression_string\x18\x04 \x01(\x0b\x32*.spark.connect.Expression.ExpressionStringH\x00R\x10\x65xpressionString\x12S\n\x0funresolved_star\x18\x05 \x01(\x0b\x32(.spark.connect.Expression.UnresolvedStarH\x00R\x0eunresolvedStar\x12\x37\n\x05\x61lias\x18\x06 \x01(\x0b\x32\x1f.spark.connect.Expression.AliasH\x00R\x05\x61lias\x12\x34\n\x04\x63\x61st\x18\x07 \x01(\x0b\x32\x1e.spark.connect.Expression.CastH\x00R\x04\x63\x61st\x12V\n\x10unresolved_regex\x18\x08 \x01(\x0b\x32).spark.connect.Expression.UnresolvedRegexH\x00R\x0funresolvedRegex\x12\x44\n\nsort_order\x18\t \x01(\x0b\x32#.spark.connect.Expression.SortOrderH\x00R\tsortOrder\x12S\n\x0flambda_function\x18\n \x01(\x0b\x32(.spark.connect.Expression.LambdaFunctionH\x00R\x0elambdaFunction\x12:\n\x06window\x18\x0b \x01(\x0b\x32 .spark.connect.Expression.WindowH\x00R\x06window\x12l\n\x18unresolved_extract_value\x18\x0c \x01(\x0b\x32\x30.spark.connect.Expression.UnresolvedExtractValueH\x00R\x16unresolvedExtractValue\x12M\n\rupdate_fields\x18\r \x01(\x0b\x32&.spark.connect.Expression.UpdateFieldsH\x00R\x0cupdateFields\x12\x82\x01\n unresolved_named_lambda_variable\x18\x0e \x01(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableH\x00R\x1dunresolvedNamedLambdaVariable\x12~\n#common_inline_user_defined_function\x18\x0f \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x1f\x63ommonInlineUserDefinedFunction\x12\x42\n\rcall_function\x18\x10 \x01(\x0b\x32\x1b.spark.connect.CallFunctionH\x00R\x0c\x63\x61llFunction\x12\x64\n\x19named_argument_expression\x18\x11 \x01(\x0b\x32&.spark.connect.NamedArgumentExpressionH\x00R\x17namedArgumentExpression\x12?\n\x0cmerge_action\x18\x13 \x01(\x0b\x32\x1a.spark.connect.MergeActionH\x00R\x0bmergeAction\x12g\n\x1atyped_aggregate_expression\x18\x14 \x01(\x0b\x32\'.spark.connect.TypedAggregateExpressionH\x00R\x18typedAggregateExpression\x12T\n\x13subquery_expression\x18\x15 \x01(\x0b\x32!.spark.connect.SubqueryExpressionH\x00R\x12subqueryExpression\x12s\n\x1b\x64irect_shuffle_partition_id\x18\x16 \x01(\x0b\x32\x32.spark.connect.Expression.DirectShufflePartitionIDH\x00R\x18\x64irectShufflePartitionId\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\x8f\x06\n\x06Window\x12\x42\n\x0fwindow_function\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0ewindowFunction\x12@\n\x0epartition_spec\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x03 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12K\n\nframe_spec\x18\x04 \x01(\x0b\x32,.spark.connect.Expression.Window.WindowFrameR\tframeSpec\x1a\xed\x03\n\x0bWindowFrame\x12U\n\nframe_type\x18\x01 \x01(\x0e\x32\x36.spark.connect.Expression.Window.WindowFrame.FrameTypeR\tframeType\x12P\n\x05lower\x18\x02 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05lower\x12P\n\x05upper\x18\x03 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05upper\x1a\x91\x01\n\rFrameBoundary\x12!\n\x0b\x63urrent_row\x18\x01 \x01(\x08H\x00R\ncurrentRow\x12\x1e\n\tunbounded\x18\x02 \x01(\x08H\x00R\tunbounded\x12\x31\n\x05value\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\x05valueB\n\n\x08\x62oundary"O\n\tFrameType\x12\x18\n\x14\x46RAME_TYPE_UNDEFINED\x10\x00\x12\x12\n\x0e\x46RAME_TYPE_ROW\x10\x01\x12\x14\n\x10\x46RAME_TYPE_RANGE\x10\x02\x1a\xa9\x03\n\tSortOrder\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12O\n\tdirection\x18\x02 \x01(\x0e\x32\x31.spark.connect.Expression.SortOrder.SortDirectionR\tdirection\x12U\n\rnull_ordering\x18\x03 \x01(\x0e\x32\x30.spark.connect.Expression.SortOrder.NullOrderingR\x0cnullOrdering"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"U\n\x0cNullOrdering\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02\x1aK\n\x18\x44irectShufflePartitionID\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x1a\xbb\x02\n\x04\x43\x61st\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12-\n\x04type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04type\x12\x1b\n\x08type_str\x18\x03 \x01(\tH\x00R\x07typeStr\x12\x44\n\teval_mode\x18\x04 \x01(\x0e\x32\'.spark.connect.Expression.Cast.EvalModeR\x08\x65valMode"b\n\x08\x45valMode\x12\x19\n\x15\x45VAL_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10\x45VAL_MODE_LEGACY\x10\x01\x12\x12\n\x0e\x45VAL_MODE_ANSI\x10\x02\x12\x11\n\rEVAL_MODE_TRY\x10\x03\x42\x0e\n\x0c\x63\x61st_to_type\x1a\x9c\x12\n\x07Literal\x12-\n\x04null\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04null\x12\x18\n\x06\x62inary\x18\x02 \x01(\x0cH\x00R\x06\x62inary\x12\x1a\n\x07\x62oolean\x18\x03 \x01(\x08H\x00R\x07\x62oolean\x12\x14\n\x04\x62yte\x18\x04 \x01(\x05H\x00R\x04\x62yte\x12\x16\n\x05short\x18\x05 \x01(\x05H\x00R\x05short\x12\x1a\n\x07integer\x18\x06 \x01(\x05H\x00R\x07integer\x12\x14\n\x04long\x18\x07 \x01(\x03H\x00R\x04long\x12\x16\n\x05\x66loat\x18\n \x01(\x02H\x00R\x05\x66loat\x12\x18\n\x06\x64ouble\x18\x0b \x01(\x01H\x00R\x06\x64ouble\x12\x45\n\x07\x64\x65\x63imal\x18\x0c \x01(\x0b\x32).spark.connect.Expression.Literal.DecimalH\x00R\x07\x64\x65\x63imal\x12\x18\n\x06string\x18\r \x01(\tH\x00R\x06string\x12\x14\n\x04\x64\x61te\x18\x10 \x01(\x05H\x00R\x04\x64\x61te\x12\x1e\n\ttimestamp\x18\x11 \x01(\x03H\x00R\ttimestamp\x12%\n\rtimestamp_ntz\x18\x12 \x01(\x03H\x00R\x0ctimestampNtz\x12\x61\n\x11\x63\x61lendar_interval\x18\x13 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12\x30\n\x13year_month_interval\x18\x14 \x01(\x05H\x00R\x11yearMonthInterval\x12,\n\x11\x64\x61y_time_interval\x18\x15 \x01(\x03H\x00R\x0f\x64\x61yTimeInterval\x12?\n\x05\x61rray\x18\x16 \x01(\x0b\x32\'.spark.connect.Expression.Literal.ArrayH\x00R\x05\x61rray\x12\x39\n\x03map\x18\x17 \x01(\x0b\x32%.spark.connect.Expression.Literal.MapH\x00R\x03map\x12\x42\n\x06struct\x18\x18 \x01(\x0b\x32(.spark.connect.Expression.Literal.StructH\x00R\x06struct\x12\x61\n\x11specialized_array\x18\x19 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.SpecializedArrayH\x00R\x10specializedArray\x12<\n\x04time\x18\x1a \x01(\x0b\x32&.spark.connect.Expression.Literal.TimeH\x00R\x04time\x1au\n\x07\x44\x65\x63imal\x12\x14\n\x05value\x18\x01 \x01(\tR\x05value\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x12\x19\n\x05scale\x18\x03 \x01(\x05H\x01R\x05scale\x88\x01\x01\x42\x0c\n\n_precisionB\x08\n\x06_scale\x1a\x62\n\x10\x43\x61lendarInterval\x12\x16\n\x06months\x18\x01 \x01(\x05R\x06months\x12\x12\n\x04\x64\x61ys\x18\x02 \x01(\x05R\x04\x64\x61ys\x12"\n\x0cmicroseconds\x18\x03 \x01(\x03R\x0cmicroseconds\x1a\xc2\x01\n\x05\x41rray\x12>\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeB\x02\x18\x01R\x0b\x65lementType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x12:\n\tdata_type\x18\x03 \x01(\x0b\x32\x1d.spark.connect.DataType.ArrayR\x08\x64\x61taType\x1a\xa5\x02\n\x03Map\x12\x36\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeB\x02\x18\x01R\x07keyType\x12:\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeB\x02\x18\x01R\tvalueType\x12\x35\n\x04keys\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x04keys\x12\x39\n\x06values\x18\x04 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x12\x38\n\tdata_type\x18\x05 \x01(\x0b\x32\x1b.spark.connect.DataType.MapR\x08\x64\x61taType\x1a\xcf\x01\n\x06Struct\x12<\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeB\x02\x18\x01R\nstructType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x12H\n\x10\x64\x61ta_type_struct\x18\x03 \x01(\x0b\x32\x1e.spark.connect.DataType.StructR\x0e\x64\x61taTypeStruct\x1a\xc0\x02\n\x10SpecializedArray\x12,\n\x05\x62ools\x18\x01 \x01(\x0b\x32\x14.spark.connect.BoolsH\x00R\x05\x62ools\x12)\n\x04ints\x18\x02 \x01(\x0b\x32\x13.spark.connect.IntsH\x00R\x04ints\x12,\n\x05longs\x18\x03 \x01(\x0b\x32\x14.spark.connect.LongsH\x00R\x05longs\x12/\n\x06\x66loats\x18\x04 \x01(\x0b\x32\x15.spark.connect.FloatsH\x00R\x06\x66loats\x12\x32\n\x07\x64oubles\x18\x05 \x01(\x0b\x32\x16.spark.connect.DoublesH\x00R\x07\x64oubles\x12\x32\n\x07strings\x18\x06 \x01(\x0b\x32\x16.spark.connect.StringsH\x00R\x07stringsB\x0c\n\nvalue_type\x1aK\n\x04Time\x12\x12\n\x04nano\x18\x01 \x01(\x03R\x04nano\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x42\x0c\n\n_precisionB\x0e\n\x0cliteral_type\x1a\xba\x01\n\x13UnresolvedAttribute\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12\x31\n\x12is_metadata_column\x18\x03 \x01(\x08H\x01R\x10isMetadataColumn\x88\x01\x01\x42\n\n\x08_plan_idB\x15\n\x13_is_metadata_column\x1a\x82\x02\n\x12UnresolvedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x1f\n\x0bis_distinct\x18\x03 \x01(\x08R\nisDistinct\x12\x37\n\x18is_user_defined_function\x18\x04 \x01(\x08R\x15isUserDefinedFunction\x12$\n\x0bis_internal\x18\x05 \x01(\x08H\x00R\nisInternal\x88\x01\x01\x42\x0e\n\x0c_is_internal\x1a\x32\n\x10\x45xpressionString\x12\x1e\n\nexpression\x18\x01 \x01(\tR\nexpression\x1a|\n\x0eUnresolvedStar\x12,\n\x0funparsed_target\x18\x01 \x01(\tH\x00R\x0eunparsedTarget\x88\x01\x01\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x01R\x06planId\x88\x01\x01\x42\x12\n\x10_unparsed_targetB\n\n\x08_plan_id\x1aV\n\x0fUnresolvedRegex\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\x84\x01\n\x16UnresolvedExtractValue\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12\x39\n\nextraction\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nextraction\x1a\xbb\x01\n\x0cUpdateFields\x12\x46\n\x11struct_expression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x10structExpression\x12\x1d\n\nfield_name\x18\x02 \x01(\tR\tfieldName\x12\x44\n\x10value_expression\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0fvalueExpression\x1ax\n\x05\x41lias\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12\x12\n\x04name\x18\x02 \x03(\tR\x04name\x12\x1f\n\x08metadata\x18\x03 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x9e\x01\n\x0eLambdaFunction\x12\x35\n\x08\x66unction\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08\x66unction\x12U\n\targuments\x18\x02 \x03(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableR\targuments\x1a>\n\x1dUnresolvedNamedLambdaVariable\x12\x1d\n\nname_parts\x18\x01 \x03(\tR\tnamePartsB\x0b\n\texpr_type"A\n\x10\x45xpressionCommon\x12-\n\x06origin\x18\x01 \x01(\x0b\x32\x15.spark.connect.OriginR\x06origin"\x8d\x03\n\x1f\x43ommonInlineUserDefinedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x39\n\npython_udf\x18\x04 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\tpythonUdf\x12I\n\x10scalar_scala_udf\x18\x05 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\x0escalarScalaUdf\x12\x33\n\x08java_udf\x18\x06 \x01(\x0b\x32\x16.spark.connect.JavaUDFH\x00R\x07javaUdf\x12\x1f\n\x0bis_distinct\x18\x07 \x01(\x08R\nisDistinctB\n\n\x08\x66unction"\xcc\x01\n\tPythonUDF\x12\x38\n\x0boutput_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVer\x12/\n\x13\x61\x64\x64itional_includes\x18\x05 \x03(\tR\x12\x61\x64\x64itionalIncludes"\xd6\x01\n\x0eScalarScalaUDF\x12\x18\n\x07payload\x18\x01 \x01(\x0cR\x07payload\x12\x37\n\ninputTypes\x18\x02 \x03(\x0b\x32\x17.spark.connect.DataTypeR\ninputTypes\x12\x37\n\noutputType\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1a\n\x08nullable\x18\x04 \x01(\x08R\x08nullable\x12\x1c\n\taggregate\x18\x05 \x01(\x08R\taggregate"\x95\x01\n\x07JavaUDF\x12\x1d\n\nclass_name\x18\x01 \x01(\tR\tclassName\x12=\n\x0boutput_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\noutputType\x88\x01\x01\x12\x1c\n\taggregate\x18\x03 \x01(\x08R\taggregateB\x0e\n\x0c_output_type"c\n\x18TypedAggregateExpression\x12G\n\x10scalar_scala_udf\x18\x01 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFR\x0escalarScalaUdf"l\n\x0c\x43\x61llFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"\\\n\x17NamedArgumentExpression\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\x80\x04\n\x0bMergeAction\x12\x46\n\x0b\x61\x63tion_type\x18\x01 \x01(\x0e\x32%.spark.connect.MergeAction.ActionTypeR\nactionType\x12<\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\tcondition\x88\x01\x01\x12G\n\x0b\x61ssignments\x18\x03 \x03(\x0b\x32%.spark.connect.MergeAction.AssignmentR\x0b\x61ssignments\x1aj\n\nAssignment\x12+\n\x03key\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\xa7\x01\n\nActionType\x12\x17\n\x13\x41\x43TION_TYPE_INVALID\x10\x00\x12\x16\n\x12\x41\x43TION_TYPE_DELETE\x10\x01\x12\x16\n\x12\x41\x43TION_TYPE_INSERT\x10\x02\x12\x1b\n\x17\x41\x43TION_TYPE_INSERT_STAR\x10\x03\x12\x16\n\x12\x41\x43TION_TYPE_UPDATE\x10\x04\x12\x1b\n\x17\x41\x43TION_TYPE_UPDATE_STAR\x10\x05\x42\x0c\n\n_condition"\xc5\x05\n\x12SubqueryExpression\x12\x17\n\x07plan_id\x18\x01 \x01(\x03R\x06planId\x12S\n\rsubquery_type\x18\x02 \x01(\x0e\x32..spark.connect.SubqueryExpression.SubqueryTypeR\x0csubqueryType\x12\x62\n\x11table_arg_options\x18\x03 \x01(\x0b\x32\x31.spark.connect.SubqueryExpression.TableArgOptionsH\x00R\x0ftableArgOptions\x88\x01\x01\x12G\n\x12in_subquery_values\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x10inSubqueryValues\x1a\xea\x01\n\x0fTableArgOptions\x12@\n\x0epartition_spec\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12\x37\n\x15with_single_partition\x18\x03 \x01(\x08H\x00R\x13withSinglePartition\x88\x01\x01\x42\x18\n\x16_with_single_partition"\x90\x01\n\x0cSubqueryType\x12\x19\n\x15SUBQUERY_TYPE_UNKNOWN\x10\x00\x12\x18\n\x14SUBQUERY_TYPE_SCALAR\x10\x01\x12\x18\n\x14SUBQUERY_TYPE_EXISTS\x10\x02\x12\x1b\n\x17SUBQUERY_TYPE_TABLE_ARG\x10\x03\x12\x14\n\x10SUBQUERY_TYPE_IN\x10\x04\x42\x14\n\x12_table_arg_optionsB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' + b'\n\x1fspark/connect/expressions.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x19spark/connect/types.proto\x1a\x1aspark/connect/common.proto"\x86\x38\n\nExpression\x12\x37\n\x06\x63ommon\x18\x12 \x01(\x0b\x32\x1f.spark.connect.ExpressionCommonR\x06\x63ommon\x12=\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x07literal\x12\x62\n\x14unresolved_attribute\x18\x02 \x01(\x0b\x32-.spark.connect.Expression.UnresolvedAttributeH\x00R\x13unresolvedAttribute\x12_\n\x13unresolved_function\x18\x03 \x01(\x0b\x32,.spark.connect.Expression.UnresolvedFunctionH\x00R\x12unresolvedFunction\x12Y\n\x11\x65xpression_string\x18\x04 \x01(\x0b\x32*.spark.connect.Expression.ExpressionStringH\x00R\x10\x65xpressionString\x12S\n\x0funresolved_star\x18\x05 \x01(\x0b\x32(.spark.connect.Expression.UnresolvedStarH\x00R\x0eunresolvedStar\x12\x37\n\x05\x61lias\x18\x06 \x01(\x0b\x32\x1f.spark.connect.Expression.AliasH\x00R\x05\x61lias\x12\x34\n\x04\x63\x61st\x18\x07 \x01(\x0b\x32\x1e.spark.connect.Expression.CastH\x00R\x04\x63\x61st\x12V\n\x10unresolved_regex\x18\x08 \x01(\x0b\x32).spark.connect.Expression.UnresolvedRegexH\x00R\x0funresolvedRegex\x12\x44\n\nsort_order\x18\t \x01(\x0b\x32#.spark.connect.Expression.SortOrderH\x00R\tsortOrder\x12S\n\x0flambda_function\x18\n \x01(\x0b\x32(.spark.connect.Expression.LambdaFunctionH\x00R\x0elambdaFunction\x12:\n\x06window\x18\x0b \x01(\x0b\x32 .spark.connect.Expression.WindowH\x00R\x06window\x12l\n\x18unresolved_extract_value\x18\x0c \x01(\x0b\x32\x30.spark.connect.Expression.UnresolvedExtractValueH\x00R\x16unresolvedExtractValue\x12M\n\rupdate_fields\x18\r \x01(\x0b\x32&.spark.connect.Expression.UpdateFieldsH\x00R\x0cupdateFields\x12\x82\x01\n unresolved_named_lambda_variable\x18\x0e \x01(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableH\x00R\x1dunresolvedNamedLambdaVariable\x12~\n#common_inline_user_defined_function\x18\x0f \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x1f\x63ommonInlineUserDefinedFunction\x12\x42\n\rcall_function\x18\x10 \x01(\x0b\x32\x1b.spark.connect.CallFunctionH\x00R\x0c\x63\x61llFunction\x12\x64\n\x19named_argument_expression\x18\x11 \x01(\x0b\x32&.spark.connect.NamedArgumentExpressionH\x00R\x17namedArgumentExpression\x12?\n\x0cmerge_action\x18\x13 \x01(\x0b\x32\x1a.spark.connect.MergeActionH\x00R\x0bmergeAction\x12g\n\x1atyped_aggregate_expression\x18\x14 \x01(\x0b\x32\'.spark.connect.TypedAggregateExpressionH\x00R\x18typedAggregateExpression\x12T\n\x13subquery_expression\x18\x15 \x01(\x0b\x32!.spark.connect.SubqueryExpressionH\x00R\x12subqueryExpression\x12s\n\x1b\x64irect_shuffle_partition_id\x18\x16 \x01(\x0b\x32\x32.spark.connect.Expression.DirectShufflePartitionIDH\x00R\x18\x64irectShufflePartitionId\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\x8f\x06\n\x06Window\x12\x42\n\x0fwindow_function\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0ewindowFunction\x12@\n\x0epartition_spec\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x03 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12K\n\nframe_spec\x18\x04 \x01(\x0b\x32,.spark.connect.Expression.Window.WindowFrameR\tframeSpec\x1a\xed\x03\n\x0bWindowFrame\x12U\n\nframe_type\x18\x01 \x01(\x0e\x32\x36.spark.connect.Expression.Window.WindowFrame.FrameTypeR\tframeType\x12P\n\x05lower\x18\x02 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05lower\x12P\n\x05upper\x18\x03 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05upper\x1a\x91\x01\n\rFrameBoundary\x12!\n\x0b\x63urrent_row\x18\x01 \x01(\x08H\x00R\ncurrentRow\x12\x1e\n\tunbounded\x18\x02 \x01(\x08H\x00R\tunbounded\x12\x31\n\x05value\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\x05valueB\n\n\x08\x62oundary"O\n\tFrameType\x12\x18\n\x14\x46RAME_TYPE_UNDEFINED\x10\x00\x12\x12\n\x0e\x46RAME_TYPE_ROW\x10\x01\x12\x14\n\x10\x46RAME_TYPE_RANGE\x10\x02\x1a\xa9\x03\n\tSortOrder\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12O\n\tdirection\x18\x02 \x01(\x0e\x32\x31.spark.connect.Expression.SortOrder.SortDirectionR\tdirection\x12U\n\rnull_ordering\x18\x03 \x01(\x0e\x32\x30.spark.connect.Expression.SortOrder.NullOrderingR\x0cnullOrdering"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"U\n\x0cNullOrdering\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02\x1aK\n\x18\x44irectShufflePartitionID\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x1a\xbb\x02\n\x04\x43\x61st\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12-\n\x04type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04type\x12\x1b\n\x08type_str\x18\x03 \x01(\tH\x00R\x07typeStr\x12\x44\n\teval_mode\x18\x04 \x01(\x0e\x32\'.spark.connect.Expression.Cast.EvalModeR\x08\x65valMode"b\n\x08\x45valMode\x12\x19\n\x15\x45VAL_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10\x45VAL_MODE_LEGACY\x10\x01\x12\x12\n\x0e\x45VAL_MODE_ANSI\x10\x02\x12\x11\n\rEVAL_MODE_TRY\x10\x03\x42\x0e\n\x0c\x63\x61st_to_type\x1a\x92\x11\n\x07Literal\x12-\n\x04null\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04null\x12\x18\n\x06\x62inary\x18\x02 \x01(\x0cH\x00R\x06\x62inary\x12\x1a\n\x07\x62oolean\x18\x03 \x01(\x08H\x00R\x07\x62oolean\x12\x14\n\x04\x62yte\x18\x04 \x01(\x05H\x00R\x04\x62yte\x12\x16\n\x05short\x18\x05 \x01(\x05H\x00R\x05short\x12\x1a\n\x07integer\x18\x06 \x01(\x05H\x00R\x07integer\x12\x14\n\x04long\x18\x07 \x01(\x03H\x00R\x04long\x12\x16\n\x05\x66loat\x18\n \x01(\x02H\x00R\x05\x66loat\x12\x18\n\x06\x64ouble\x18\x0b \x01(\x01H\x00R\x06\x64ouble\x12\x45\n\x07\x64\x65\x63imal\x18\x0c \x01(\x0b\x32).spark.connect.Expression.Literal.DecimalH\x00R\x07\x64\x65\x63imal\x12\x18\n\x06string\x18\r \x01(\tH\x00R\x06string\x12\x14\n\x04\x64\x61te\x18\x10 \x01(\x05H\x00R\x04\x64\x61te\x12\x1e\n\ttimestamp\x18\x11 \x01(\x03H\x00R\ttimestamp\x12%\n\rtimestamp_ntz\x18\x12 \x01(\x03H\x00R\x0ctimestampNtz\x12\x61\n\x11\x63\x61lendar_interval\x18\x13 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12\x30\n\x13year_month_interval\x18\x14 \x01(\x05H\x00R\x11yearMonthInterval\x12,\n\x11\x64\x61y_time_interval\x18\x15 \x01(\x03H\x00R\x0f\x64\x61yTimeInterval\x12?\n\x05\x61rray\x18\x16 \x01(\x0b\x32\'.spark.connect.Expression.Literal.ArrayH\x00R\x05\x61rray\x12\x39\n\x03map\x18\x17 \x01(\x0b\x32%.spark.connect.Expression.Literal.MapH\x00R\x03map\x12\x42\n\x06struct\x18\x18 \x01(\x0b\x32(.spark.connect.Expression.Literal.StructH\x00R\x06struct\x12\x61\n\x11specialized_array\x18\x19 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.SpecializedArrayH\x00R\x10specializedArray\x12<\n\x04time\x18\x1a \x01(\x0b\x32&.spark.connect.Expression.Literal.TimeH\x00R\x04time\x12\x34\n\tdata_type\x18\x64 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x08\x64\x61taType\x1au\n\x07\x44\x65\x63imal\x12\x14\n\x05value\x18\x01 \x01(\tR\x05value\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x12\x19\n\x05scale\x18\x03 \x01(\x05H\x01R\x05scale\x88\x01\x01\x42\x0c\n\n_precisionB\x08\n\x06_scale\x1a\x62\n\x10\x43\x61lendarInterval\x12\x16\n\x06months\x18\x01 \x01(\x05R\x06months\x12\x12\n\x04\x64\x61ys\x18\x02 \x01(\x05R\x04\x64\x61ys\x12"\n\x0cmicroseconds\x18\x03 \x01(\x03R\x0cmicroseconds\x1a\x86\x01\n\x05\x41rray\x12>\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeB\x02\x18\x01R\x0b\x65lementType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x1a\xeb\x01\n\x03Map\x12\x36\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeB\x02\x18\x01R\x07keyType\x12:\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeB\x02\x18\x01R\tvalueType\x12\x35\n\x04keys\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x04keys\x12\x39\n\x06values\x18\x04 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1a\x85\x01\n\x06Struct\x12<\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeB\x02\x18\x01R\nstructType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x1a\xc0\x02\n\x10SpecializedArray\x12,\n\x05\x62ools\x18\x01 \x01(\x0b\x32\x14.spark.connect.BoolsH\x00R\x05\x62ools\x12)\n\x04ints\x18\x02 \x01(\x0b\x32\x13.spark.connect.IntsH\x00R\x04ints\x12,\n\x05longs\x18\x03 \x01(\x0b\x32\x14.spark.connect.LongsH\x00R\x05longs\x12/\n\x06\x66loats\x18\x04 \x01(\x0b\x32\x15.spark.connect.FloatsH\x00R\x06\x66loats\x12\x32\n\x07\x64oubles\x18\x05 \x01(\x0b\x32\x16.spark.connect.DoublesH\x00R\x07\x64oubles\x12\x32\n\x07strings\x18\x06 \x01(\x0b\x32\x16.spark.connect.StringsH\x00R\x07stringsB\x0c\n\nvalue_type\x1aK\n\x04Time\x12\x12\n\x04nano\x18\x01 \x01(\x03R\x04nano\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x42\x0c\n\n_precisionB\x0e\n\x0cliteral_type\x1a\xba\x01\n\x13UnresolvedAttribute\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12\x31\n\x12is_metadata_column\x18\x03 \x01(\x08H\x01R\x10isMetadataColumn\x88\x01\x01\x42\n\n\x08_plan_idB\x15\n\x13_is_metadata_column\x1a\x82\x02\n\x12UnresolvedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x1f\n\x0bis_distinct\x18\x03 \x01(\x08R\nisDistinct\x12\x37\n\x18is_user_defined_function\x18\x04 \x01(\x08R\x15isUserDefinedFunction\x12$\n\x0bis_internal\x18\x05 \x01(\x08H\x00R\nisInternal\x88\x01\x01\x42\x0e\n\x0c_is_internal\x1a\x32\n\x10\x45xpressionString\x12\x1e\n\nexpression\x18\x01 \x01(\tR\nexpression\x1a|\n\x0eUnresolvedStar\x12,\n\x0funparsed_target\x18\x01 \x01(\tH\x00R\x0eunparsedTarget\x88\x01\x01\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x01R\x06planId\x88\x01\x01\x42\x12\n\x10_unparsed_targetB\n\n\x08_plan_id\x1aV\n\x0fUnresolvedRegex\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\x84\x01\n\x16UnresolvedExtractValue\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12\x39\n\nextraction\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nextraction\x1a\xbb\x01\n\x0cUpdateFields\x12\x46\n\x11struct_expression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x10structExpression\x12\x1d\n\nfield_name\x18\x02 \x01(\tR\tfieldName\x12\x44\n\x10value_expression\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0fvalueExpression\x1ax\n\x05\x41lias\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12\x12\n\x04name\x18\x02 \x03(\tR\x04name\x12\x1f\n\x08metadata\x18\x03 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x9e\x01\n\x0eLambdaFunction\x12\x35\n\x08\x66unction\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08\x66unction\x12U\n\targuments\x18\x02 \x03(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableR\targuments\x1a>\n\x1dUnresolvedNamedLambdaVariable\x12\x1d\n\nname_parts\x18\x01 \x03(\tR\tnamePartsB\x0b\n\texpr_type"A\n\x10\x45xpressionCommon\x12-\n\x06origin\x18\x01 \x01(\x0b\x32\x15.spark.connect.OriginR\x06origin"\x8d\x03\n\x1f\x43ommonInlineUserDefinedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x39\n\npython_udf\x18\x04 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\tpythonUdf\x12I\n\x10scalar_scala_udf\x18\x05 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\x0escalarScalaUdf\x12\x33\n\x08java_udf\x18\x06 \x01(\x0b\x32\x16.spark.connect.JavaUDFH\x00R\x07javaUdf\x12\x1f\n\x0bis_distinct\x18\x07 \x01(\x08R\nisDistinctB\n\n\x08\x66unction"\xcc\x01\n\tPythonUDF\x12\x38\n\x0boutput_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVer\x12/\n\x13\x61\x64\x64itional_includes\x18\x05 \x03(\tR\x12\x61\x64\x64itionalIncludes"\xd6\x01\n\x0eScalarScalaUDF\x12\x18\n\x07payload\x18\x01 \x01(\x0cR\x07payload\x12\x37\n\ninputTypes\x18\x02 \x03(\x0b\x32\x17.spark.connect.DataTypeR\ninputTypes\x12\x37\n\noutputType\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1a\n\x08nullable\x18\x04 \x01(\x08R\x08nullable\x12\x1c\n\taggregate\x18\x05 \x01(\x08R\taggregate"\x95\x01\n\x07JavaUDF\x12\x1d\n\nclass_name\x18\x01 \x01(\tR\tclassName\x12=\n\x0boutput_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\noutputType\x88\x01\x01\x12\x1c\n\taggregate\x18\x03 \x01(\x08R\taggregateB\x0e\n\x0c_output_type"c\n\x18TypedAggregateExpression\x12G\n\x10scalar_scala_udf\x18\x01 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFR\x0escalarScalaUdf"l\n\x0c\x43\x61llFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"\\\n\x17NamedArgumentExpression\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\x80\x04\n\x0bMergeAction\x12\x46\n\x0b\x61\x63tion_type\x18\x01 \x01(\x0e\x32%.spark.connect.MergeAction.ActionTypeR\nactionType\x12<\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\tcondition\x88\x01\x01\x12G\n\x0b\x61ssignments\x18\x03 \x03(\x0b\x32%.spark.connect.MergeAction.AssignmentR\x0b\x61ssignments\x1aj\n\nAssignment\x12+\n\x03key\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\xa7\x01\n\nActionType\x12\x17\n\x13\x41\x43TION_TYPE_INVALID\x10\x00\x12\x16\n\x12\x41\x43TION_TYPE_DELETE\x10\x01\x12\x16\n\x12\x41\x43TION_TYPE_INSERT\x10\x02\x12\x1b\n\x17\x41\x43TION_TYPE_INSERT_STAR\x10\x03\x12\x16\n\x12\x41\x43TION_TYPE_UPDATE\x10\x04\x12\x1b\n\x17\x41\x43TION_TYPE_UPDATE_STAR\x10\x05\x42\x0c\n\n_condition"\xc5\x05\n\x12SubqueryExpression\x12\x17\n\x07plan_id\x18\x01 \x01(\x03R\x06planId\x12S\n\rsubquery_type\x18\x02 \x01(\x0e\x32..spark.connect.SubqueryExpression.SubqueryTypeR\x0csubqueryType\x12\x62\n\x11table_arg_options\x18\x03 \x01(\x0b\x32\x31.spark.connect.SubqueryExpression.TableArgOptionsH\x00R\x0ftableArgOptions\x88\x01\x01\x12G\n\x12in_subquery_values\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x10inSubqueryValues\x1a\xea\x01\n\x0fTableArgOptions\x12@\n\x0epartition_spec\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12\x37\n\x15with_single_partition\x18\x03 \x01(\x08H\x00R\x13withSinglePartition\x88\x01\x01\x42\x18\n\x16_with_single_partition"\x90\x01\n\x0cSubqueryType\x12\x19\n\x15SUBQUERY_TYPE_UNKNOWN\x10\x00\x12\x18\n\x14SUBQUERY_TYPE_SCALAR\x10\x01\x12\x18\n\x14SUBQUERY_TYPE_EXISTS\x10\x02\x12\x1b\n\x17SUBQUERY_TYPE_TABLE_ARG\x10\x03\x12\x14\n\x10SUBQUERY_TYPE_IN\x10\x04\x42\x14\n\x12_table_arg_optionsB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' ) _globals = globals() @@ -68,7 +68,7 @@ "struct_type" ]._serialized_options = b"\030\001" _globals["_EXPRESSION"]._serialized_start = 133 - _globals["_EXPRESSION"]._serialized_end = 7445 + _globals["_EXPRESSION"]._serialized_end = 7307 _globals["_EXPRESSION_WINDOW"]._serialized_start = 2103 _globals["_EXPRESSION_WINDOW"]._serialized_end = 2886 _globals["_EXPRESSION_WINDOW_WINDOWFRAME"]._serialized_start = 2393 @@ -90,67 +90,67 @@ _globals["_EXPRESSION_CAST_EVALMODE"]._serialized_start = 3595 _globals["_EXPRESSION_CAST_EVALMODE"]._serialized_end = 3693 _globals["_EXPRESSION_LITERAL"]._serialized_start = 3712 - _globals["_EXPRESSION_LITERAL"]._serialized_end = 6044 - _globals["_EXPRESSION_LITERAL_DECIMAL"]._serialized_start = 4708 - _globals["_EXPRESSION_LITERAL_DECIMAL"]._serialized_end = 4825 - _globals["_EXPRESSION_LITERAL_CALENDARINTERVAL"]._serialized_start = 4827 - _globals["_EXPRESSION_LITERAL_CALENDARINTERVAL"]._serialized_end = 4925 - _globals["_EXPRESSION_LITERAL_ARRAY"]._serialized_start = 4928 - _globals["_EXPRESSION_LITERAL_ARRAY"]._serialized_end = 5122 - _globals["_EXPRESSION_LITERAL_MAP"]._serialized_start = 5125 - _globals["_EXPRESSION_LITERAL_MAP"]._serialized_end = 5418 - _globals["_EXPRESSION_LITERAL_STRUCT"]._serialized_start = 5421 - _globals["_EXPRESSION_LITERAL_STRUCT"]._serialized_end = 5628 - _globals["_EXPRESSION_LITERAL_SPECIALIZEDARRAY"]._serialized_start = 5631 - _globals["_EXPRESSION_LITERAL_SPECIALIZEDARRAY"]._serialized_end = 5951 - _globals["_EXPRESSION_LITERAL_TIME"]._serialized_start = 5953 - _globals["_EXPRESSION_LITERAL_TIME"]._serialized_end = 6028 - _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_start = 6047 - _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_end = 6233 - _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_start = 6236 - _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_end = 6494 - _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_start = 6496 - _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_end = 6546 - _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_start = 6548 - _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_end = 6672 - _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_start = 6674 - _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_end = 6760 - _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_start = 6763 - _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_end = 6895 - _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_start = 6898 - _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_end = 7085 - _globals["_EXPRESSION_ALIAS"]._serialized_start = 7087 - _globals["_EXPRESSION_ALIAS"]._serialized_end = 7207 - _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_start = 7210 - _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_end = 7368 - _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_start = 7370 - _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_end = 7432 - _globals["_EXPRESSIONCOMMON"]._serialized_start = 7447 - _globals["_EXPRESSIONCOMMON"]._serialized_end = 7512 - _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_start = 7515 - _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_end = 7912 - _globals["_PYTHONUDF"]._serialized_start = 7915 - _globals["_PYTHONUDF"]._serialized_end = 8119 - _globals["_SCALARSCALAUDF"]._serialized_start = 8122 - _globals["_SCALARSCALAUDF"]._serialized_end = 8336 - _globals["_JAVAUDF"]._serialized_start = 8339 - _globals["_JAVAUDF"]._serialized_end = 8488 - _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_start = 8490 - _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_end = 8589 - _globals["_CALLFUNCTION"]._serialized_start = 8591 - _globals["_CALLFUNCTION"]._serialized_end = 8699 - _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_start = 8701 - _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_end = 8793 - _globals["_MERGEACTION"]._serialized_start = 8796 - _globals["_MERGEACTION"]._serialized_end = 9308 - _globals["_MERGEACTION_ASSIGNMENT"]._serialized_start = 9018 - _globals["_MERGEACTION_ASSIGNMENT"]._serialized_end = 9124 - _globals["_MERGEACTION_ACTIONTYPE"]._serialized_start = 9127 - _globals["_MERGEACTION_ACTIONTYPE"]._serialized_end = 9294 - _globals["_SUBQUERYEXPRESSION"]._serialized_start = 9311 - _globals["_SUBQUERYEXPRESSION"]._serialized_end = 10020 - _globals["_SUBQUERYEXPRESSION_TABLEARGOPTIONS"]._serialized_start = 9617 - _globals["_SUBQUERYEXPRESSION_TABLEARGOPTIONS"]._serialized_end = 9851 - _globals["_SUBQUERYEXPRESSION_SUBQUERYTYPE"]._serialized_start = 9854 - _globals["_SUBQUERYEXPRESSION_SUBQUERYTYPE"]._serialized_end = 9998 + _globals["_EXPRESSION_LITERAL"]._serialized_end = 5906 + _globals["_EXPRESSION_LITERAL_DECIMAL"]._serialized_start = 4762 + _globals["_EXPRESSION_LITERAL_DECIMAL"]._serialized_end = 4879 + _globals["_EXPRESSION_LITERAL_CALENDARINTERVAL"]._serialized_start = 4881 + _globals["_EXPRESSION_LITERAL_CALENDARINTERVAL"]._serialized_end = 4979 + _globals["_EXPRESSION_LITERAL_ARRAY"]._serialized_start = 4982 + _globals["_EXPRESSION_LITERAL_ARRAY"]._serialized_end = 5116 + _globals["_EXPRESSION_LITERAL_MAP"]._serialized_start = 5119 + _globals["_EXPRESSION_LITERAL_MAP"]._serialized_end = 5354 + _globals["_EXPRESSION_LITERAL_STRUCT"]._serialized_start = 5357 + _globals["_EXPRESSION_LITERAL_STRUCT"]._serialized_end = 5490 + _globals["_EXPRESSION_LITERAL_SPECIALIZEDARRAY"]._serialized_start = 5493 + _globals["_EXPRESSION_LITERAL_SPECIALIZEDARRAY"]._serialized_end = 5813 + _globals["_EXPRESSION_LITERAL_TIME"]._serialized_start = 5815 + _globals["_EXPRESSION_LITERAL_TIME"]._serialized_end = 5890 + _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_start = 5909 + _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_end = 6095 + _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_start = 6098 + _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_end = 6356 + _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_start = 6358 + _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_end = 6408 + _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_start = 6410 + _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_end = 6534 + _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_start = 6536 + _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_end = 6622 + _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_start = 6625 + _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_end = 6757 + _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_start = 6760 + _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_end = 6947 + _globals["_EXPRESSION_ALIAS"]._serialized_start = 6949 + _globals["_EXPRESSION_ALIAS"]._serialized_end = 7069 + _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_start = 7072 + _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_end = 7230 + _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_start = 7232 + _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_end = 7294 + _globals["_EXPRESSIONCOMMON"]._serialized_start = 7309 + _globals["_EXPRESSIONCOMMON"]._serialized_end = 7374 + _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_start = 7377 + _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_end = 7774 + _globals["_PYTHONUDF"]._serialized_start = 7777 + _globals["_PYTHONUDF"]._serialized_end = 7981 + _globals["_SCALARSCALAUDF"]._serialized_start = 7984 + _globals["_SCALARSCALAUDF"]._serialized_end = 8198 + _globals["_JAVAUDF"]._serialized_start = 8201 + _globals["_JAVAUDF"]._serialized_end = 8350 + _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_start = 8352 + _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_end = 8451 + _globals["_CALLFUNCTION"]._serialized_start = 8453 + _globals["_CALLFUNCTION"]._serialized_end = 8561 + _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_start = 8563 + _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_end = 8655 + _globals["_MERGEACTION"]._serialized_start = 8658 + _globals["_MERGEACTION"]._serialized_end = 9170 + _globals["_MERGEACTION_ASSIGNMENT"]._serialized_start = 8880 + _globals["_MERGEACTION_ASSIGNMENT"]._serialized_end = 8986 + _globals["_MERGEACTION_ACTIONTYPE"]._serialized_start = 8989 + _globals["_MERGEACTION_ACTIONTYPE"]._serialized_end = 9156 + _globals["_SUBQUERYEXPRESSION"]._serialized_start = 9173 + _globals["_SUBQUERYEXPRESSION"]._serialized_end = 9882 + _globals["_SUBQUERYEXPRESSION_TABLEARGOPTIONS"]._serialized_start = 9479 + _globals["_SUBQUERYEXPRESSION_TABLEARGOPTIONS"]._serialized_end = 9713 + _globals["_SUBQUERYEXPRESSION_SUBQUERYTYPE"]._serialized_start = 9716 + _globals["_SUBQUERYEXPRESSION_SUBQUERYTYPE"]._serialized_end = 9860 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.pyi b/python/pyspark/sql/connect/proto/expressions_pb2.pyi index 020aac8c2d0a..e2e23dd8c553 100644 --- a/python/pyspark/sql/connect/proto/expressions_pb2.pyi +++ b/python/pyspark/sql/connect/proto/expressions_pb2.pyi @@ -496,7 +496,6 @@ class Expression(google.protobuf.message.Message): ELEMENT_TYPE_FIELD_NUMBER: builtins.int ELEMENTS_FIELD_NUMBER: builtins.int - DATA_TYPE_FIELD_NUMBER: builtins.int @property def element_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType: """(Deprecated) The element type of the array. @@ -509,42 +508,20 @@ class Expression(google.protobuf.message.Message): ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ global___Expression.Literal ]: - """The literal values that make up the array elements. - - For inferring the data_type.element_type, only the first element needs to - contain the type information. - """ - @property - def data_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType.Array: - """The type of the array. You don't need to set this field if the type information is not needed. - - If the element type can be inferred from the first element of the elements field, - then you don't need to set data_type.element_type to save space. - - On the other hand, redundant type information is also acceptable. - """ + """The literal values that make up the array elements.""" def __init__( self, *, element_type: pyspark.sql.connect.proto.types_pb2.DataType | None = ..., elements: collections.abc.Iterable[global___Expression.Literal] | None = ..., - data_type: pyspark.sql.connect.proto.types_pb2.DataType.Array | None = ..., ) -> None: ... def HasField( - self, - field_name: typing_extensions.Literal[ - "data_type", b"data_type", "element_type", b"element_type" - ], + self, field_name: typing_extensions.Literal["element_type", b"element_type"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ - "data_type", - b"data_type", - "element_type", - b"element_type", - "elements", - b"elements", + "element_type", b"element_type", "elements", b"elements" ], ) -> None: ... @@ -555,7 +532,6 @@ class Expression(google.protobuf.message.Message): VALUE_TYPE_FIELD_NUMBER: builtins.int KEYS_FIELD_NUMBER: builtins.int VALUES_FIELD_NUMBER: builtins.int - DATA_TYPE_FIELD_NUMBER: builtins.int @property def key_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType: """(Deprecated) The key type of the map. @@ -575,31 +551,14 @@ class Expression(google.protobuf.message.Message): ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ global___Expression.Literal ]: - """The literal keys that make up the map. - - For inferring the data_type.key_type, only the first key needs to - contain the type information. - """ + """The literal keys that make up the map.""" @property def values( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ global___Expression.Literal ]: - """The literal values that make up the map. - - For inferring the data_type.value_type, only the first value needs to - contain the type information. - """ - @property - def data_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType.Map: - """The type of the map. You don't need to set this field if the type information is not needed. - - If the key/value types can be inferred from the first element of the keys/values fields, - then you don't need to set data_type.key_type/data_type.value_type to save space. - - On the other hand, redundant type information is also acceptable. - """ + """The literal values that make up the map.""" def __init__( self, *, @@ -607,19 +566,16 @@ class Expression(google.protobuf.message.Message): value_type: pyspark.sql.connect.proto.types_pb2.DataType | None = ..., keys: collections.abc.Iterable[global___Expression.Literal] | None = ..., values: collections.abc.Iterable[global___Expression.Literal] | None = ..., - data_type: pyspark.sql.connect.proto.types_pb2.DataType.Map | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ - "data_type", b"data_type", "key_type", b"key_type", "value_type", b"value_type" + "key_type", b"key_type", "value_type", b"value_type" ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ - "data_type", - b"data_type", "key_type", b"key_type", "keys", @@ -636,13 +592,12 @@ class Expression(google.protobuf.message.Message): STRUCT_TYPE_FIELD_NUMBER: builtins.int ELEMENTS_FIELD_NUMBER: builtins.int - DATA_TYPE_STRUCT_FIELD_NUMBER: builtins.int @property def struct_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType: """(Deprecated) The type of the struct. This field is deprecated since Spark 4.1+ because using DataType as the type of a struct - is ambiguous. Use data_type_struct field instead. + is ambiguous. Use data_type field instead. """ @property def elements( @@ -650,36 +605,20 @@ class Expression(google.protobuf.message.Message): ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ global___Expression.Literal ]: - """(Required) The literal values that make up the struct elements.""" - @property - def data_type_struct(self) -> pyspark.sql.connect.proto.types_pb2.DataType.Struct: - """The type of the struct. You don't need to set this field if the type information is not needed. - - Whether data_type_struct.fields.data_type should be set depends on - whether each field's type can be inferred from the elements field. - """ + """The literal values that make up the struct elements.""" def __init__( self, *, struct_type: pyspark.sql.connect.proto.types_pb2.DataType | None = ..., elements: collections.abc.Iterable[global___Expression.Literal] | None = ..., - data_type_struct: pyspark.sql.connect.proto.types_pb2.DataType.Struct | None = ..., ) -> None: ... def HasField( - self, - field_name: typing_extensions.Literal[ - "data_type_struct", b"data_type_struct", "struct_type", b"struct_type" - ], + self, field_name: typing_extensions.Literal["struct_type", b"struct_type"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ - "data_type_struct", - b"data_type_struct", - "elements", - b"elements", - "struct_type", - b"struct_type", + "elements", b"elements", "struct_type", b"struct_type" ], ) -> None: ... @@ -811,6 +750,7 @@ class Expression(google.protobuf.message.Message): STRUCT_FIELD_NUMBER: builtins.int SPECIALIZED_ARRAY_FIELD_NUMBER: builtins.int TIME_FIELD_NUMBER: builtins.int + DATA_TYPE_FIELD_NUMBER: builtins.int @property def null(self) -> pyspark.sql.connect.proto.types_pb2.DataType: ... binary: builtins.bytes @@ -844,6 +784,14 @@ class Expression(google.protobuf.message.Message): def specialized_array(self) -> global___Expression.Literal.SpecializedArray: ... @property def time(self) -> global___Expression.Literal.Time: ... + @property + def data_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType: + """Data type information for the literal. + This field is required only in the root literal message for null values or + for data types (e.g., array, map, or struct) with non-trivial information. + If the data_type field is not set at the root level, the data type will be + inferred or retrieved from the deprecated data type fields using best efforts. + """ def __init__( self, *, @@ -869,6 +817,7 @@ class Expression(google.protobuf.message.Message): struct: global___Expression.Literal.Struct | None = ..., specialized_array: global___Expression.Literal.SpecializedArray | None = ..., time: global___Expression.Literal.Time | None = ..., + data_type: pyspark.sql.connect.proto.types_pb2.DataType | None = ..., ) -> None: ... def HasField( self, @@ -883,6 +832,8 @@ class Expression(google.protobuf.message.Message): b"byte", "calendar_interval", b"calendar_interval", + "data_type", + b"data_type", "date", b"date", "day_time_interval", @@ -934,6 +885,8 @@ class Expression(google.protobuf.message.Message): b"byte", "calendar_interval", b"calendar_interval", + "data_type", + b"data_type", "date", b"date", "day_time_interval", diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ColumnNodeToProtoConverterSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ColumnNodeToProtoConverterSuite.scala index 90da125b49ff..389b3a5c52ac 100644 --- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ColumnNodeToProtoConverterSuite.scala +++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ColumnNodeToProtoConverterSuite.scala @@ -78,23 +78,17 @@ class ColumnNodeToProtoConverterSuite extends ConnectFunSuite { testConversion( Literal((12.0, "north", 60.0, "west"), Option(dataType)), expr { b => - val builder = b.getLiteralBuilder.getStructBuilder - builder + b.getLiteralBuilder.getStructBuilder .addElements(proto.Expression.Literal.newBuilder().setDouble(12.0).build()) - builder .addElements(proto.Expression.Literal.newBuilder().setString("north").build()) - builder .addElements(proto.Expression.Literal.newBuilder().setDouble(60.0).build()) - builder .addElements(proto.Expression.Literal.newBuilder().setString("west").build()) - builder.setDataTypeStruct( + b.getLiteralBuilder.getDataTypeBuilder.setStruct( proto.DataType.Struct .newBuilder() - .addFields( - proto.DataType.StructField.newBuilder().setName("_1").setNullable(true).build()) + .addFields(structField("_1", ProtoDataTypes.DoubleType)) .addFields(structField("_2", stringTypeWithCollation)) - .addFields( - proto.DataType.StructField.newBuilder().setName("_3").setNullable(true).build()) + .addFields(structField("_3", ProtoDataTypes.DoubleType)) .addFields(structField("_4", stringTypeWithCollation)) .build()) }) diff --git a/sql/connect/common/src/main/protobuf/spark/connect/expressions.proto b/sql/connect/common/src/main/protobuf/spark/connect/expressions.proto index f2f0226ba42c..9efbf609cbba 100644 --- a/sql/connect/common/src/main/protobuf/spark/connect/expressions.proto +++ b/sql/connect/common/src/main/protobuf/spark/connect/expressions.proto @@ -207,6 +207,13 @@ message Expression { Time time = 26; } + // Data type information for the literal. + // This field is required only in the root literal message for null values or + // for data types (e.g., array, map, or struct) with non-trivial information. + // If the data_type field is not set at the root level, the data type will be + // inferred or retrieved from the deprecated data type fields using best efforts. + DataType data_type = 100; + message Decimal { // the string representation. string value = 1; @@ -230,18 +237,7 @@ message Expression { DataType element_type = 1 [deprecated = true]; // The literal values that make up the array elements. - // - // For inferring the data_type.element_type, only the first element needs to - // contain the type information. repeated Literal elements = 2; - - // The type of the array. You don't need to set this field if the type information is not needed. - // - // If the element type can be inferred from the first element of the elements field, - // then you don't need to set data_type.element_type to save space. - // - // On the other hand, redundant type information is also acceptable. - DataType.Array data_type = 3; } message Map { @@ -257,41 +253,21 @@ message Expression { DataType value_type = 2 [deprecated = true]; // The literal keys that make up the map. - // - // For inferring the data_type.key_type, only the first key needs to - // contain the type information. repeated Literal keys = 3; // The literal values that make up the map. - // - // For inferring the data_type.value_type, only the first value needs to - // contain the type information. repeated Literal values = 4; - - // The type of the map. You don't need to set this field if the type information is not needed. - // - // If the key/value types can be inferred from the first element of the keys/values fields, - // then you don't need to set data_type.key_type/data_type.value_type to save space. - // - // On the other hand, redundant type information is also acceptable. - DataType.Map data_type = 5; } message Struct { // (Deprecated) The type of the struct. // // This field is deprecated since Spark 4.1+ because using DataType as the type of a struct - // is ambiguous. Use data_type_struct field instead. + // is ambiguous. Use data_type field instead. DataType struct_type = 1 [deprecated = true]; - // (Required) The literal values that make up the struct elements. + // The literal values that make up the struct elements. repeated Literal elements = 2; - - // The type of the struct. You don't need to set this field if the type information is not needed. - // - // Whether data_type_struct.fields.data_type should be set depends on - // whether each field's type can be inferred from the elements field. - DataType.Struct data_type_struct = 3; } message SpecializedArray { diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala index 5e45fbb932f4..419cc8e082af 100644 --- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala +++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala @@ -154,7 +154,11 @@ object DataTypeProtoConverter { } } - def toConnectProtoType(t: DataType): proto.DataType = { + def toConnectProtoType(t: DataType, bytesToBinary: Boolean = false): proto.DataType = { + toConnectProtoTypeInternal(t, bytesToBinary) + } + + private def toConnectProtoTypeInternal(t: DataType, bytesToBinary: Boolean): proto.DataType = { t match { case NullType => ProtoDataTypes.NullType @@ -241,15 +245,22 @@ object DataTypeProtoConverter { .build() case ArrayType(elementType: DataType, containsNull: Boolean) => - proto.DataType - .newBuilder() - .setArray( - proto.DataType.Array - .newBuilder() - .setElementType(toConnectProtoType(elementType)) - .setContainsNull(containsNull) - .build()) - .build() + if (elementType == ByteType && bytesToBinary) { + proto.DataType + .newBuilder() + .setBinary(proto.DataType.Binary.newBuilder().build()) + .build() + } else { + proto.DataType + .newBuilder() + .setArray( + proto.DataType.Array + .newBuilder() + .setElementType(toConnectProtoTypeInternal(elementType, bytesToBinary)) + .setContainsNull(containsNull) + .build()) + .build() + } case StructType(fields: Array[StructField]) => val protoFields = fields.toImmutableArraySeq.map { @@ -262,14 +273,14 @@ object DataTypeProtoConverter { proto.DataType.StructField .newBuilder() .setName(name) - .setDataType(toConnectProtoType(dataType)) + .setDataType(toConnectProtoTypeInternal(dataType, bytesToBinary)) .setNullable(nullable) .build() } else { proto.DataType.StructField .newBuilder() .setName(name) - .setDataType(toConnectProtoType(dataType)) + .setDataType(toConnectProtoTypeInternal(dataType, bytesToBinary)) .setNullable(nullable) .setMetadata(metadata.json) .build() @@ -290,8 +301,8 @@ object DataTypeProtoConverter { .setMap( proto.DataType.Map .newBuilder() - .setKeyType(toConnectProtoType(keyType)) - .setValueType(toConnectProtoType(valueType)) + .setKeyType(toConnectProtoTypeInternal(keyType, bytesToBinary)) + .setValueType(toConnectProtoTypeInternal(valueType, bytesToBinary)) .setValueContainsNull(valueContainsNull) .build()) .build() @@ -307,7 +318,7 @@ object DataTypeProtoConverter { .newBuilder() .setType("udt") .setPythonClass(pyudt.pyUDT) - .setSqlType(toConnectProtoType(pyudt.sqlType)) + .setSqlType(toConnectProtoTypeInternal(pyudt.sqlType, bytesToBinary)) .setSerializedPythonClass(pyudt.serializedPyClass) .build()) .build() @@ -328,7 +339,7 @@ object DataTypeProtoConverter { builder .setType("udt") .setJvmClass(className) - .setSqlType(toConnectProtoType(udt.sqlType)) + .setSqlType(toConnectProtoTypeInternal(udt.sqlType, bytesToBinary)) if (udt.pyUDT != null) { builder.setPythonClass(udt.pyUDT) diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala index 3c07bd5851fb..84637a9c8aab 100644 --- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala +++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala @@ -53,51 +53,9 @@ object LiteralValueProtoConverter { } } - private def setArrayTypeAfterAddingElements( - ab: proto.Expression.Literal.Array.Builder, - elementType: DataType, - containsNull: Boolean, - useDeprecatedDataTypeFields: Boolean, - needDataType: Boolean): Unit = { - if (useDeprecatedDataTypeFields) { - ab.setElementType(toConnectProtoType(elementType)) - } else if (needDataType) { - val dataTypeBuilder = proto.DataType.Array.newBuilder() - if (ab.getElementsCount == 0 || getInferredDataType(ab.getElements(0)).isEmpty) { - dataTypeBuilder.setElementType(toConnectProtoType(elementType)) - } - dataTypeBuilder.setContainsNull(containsNull) - ab.setDataType(dataTypeBuilder.build()) - } - } - - private def setMapTypeAfterAddingKeysAndValues( - mb: proto.Expression.Literal.Map.Builder, - keyType: DataType, - valueType: DataType, - valueContainsNull: Boolean, - useDeprecatedDataTypeFields: Boolean, - needDataType: Boolean): Unit = { - if (useDeprecatedDataTypeFields) { - mb.setKeyType(toConnectProtoType(keyType)) - mb.setValueType(toConnectProtoType(valueType)) - } else if (needDataType) { - val dataTypeBuilder = proto.DataType.Map.newBuilder() - if (mb.getKeysCount == 0 || getInferredDataType(mb.getKeys(0)).isEmpty) { - dataTypeBuilder.setKeyType(toConnectProtoType(keyType)) - } - if (mb.getValuesCount == 0 || getInferredDataType(mb.getValues(0)).isEmpty) { - dataTypeBuilder.setValueType(toConnectProtoType(valueType)) - } - dataTypeBuilder.setValueContainsNull(valueContainsNull) - mb.setDataType(dataTypeBuilder.build()) - } - } - private def toLiteralProtoBuilderInternal( literal: Any, - options: ToLiteralProtoOptions, - needDataType: Boolean): proto.Expression.Literal.Builder = { + options: ToLiteralProtoOptions): proto.Expression.Literal.Builder = { val builder = proto.Expression.Literal.newBuilder() def decimalBuilder(precision: Int, scale: Int, value: String) = { @@ -113,17 +71,12 @@ object LiteralValueProtoConverter { def arrayBuilder(array: Array[_]) = { val ab = builder.getArrayBuilder - var needElementType = needDataType array.foreach { x => - ab.addElements(toLiteralProtoBuilderInternal(x, options, needElementType).build()) - needElementType = false + ab.addElements(toLiteralProtoBuilderInternal(x, options).build()) + } + if (options.useDeprecatedDataTypeFields) { + ab.setElementType(toConnectProtoType(toDataType(array.getClass.getComponentType))) } - setArrayTypeAfterAddingElements( - ab, - toDataType(array.getClass.getComponentType), - containsNull = true, - options.useDeprecatedDataTypeFields, - needDataType) ab } @@ -143,9 +96,9 @@ object LiteralValueProtoConverter { case v: Char => builder.setString(v.toString) case v: Array[Char] => builder.setString(String.valueOf(v)) case v: Array[Byte] => builder.setBinary(ByteString.copyFrom(v)) - case v: mutable.ArraySeq[_] => toLiteralProtoBuilderInternal(v.array, options, needDataType) + case v: mutable.ArraySeq[_] => toLiteralProtoBuilderInternal(v.array, options) case v: immutable.ArraySeq[_] => - toLiteralProtoBuilderInternal(v.unsafeArray, options, needDataType) + toLiteralProtoBuilderInternal(v.unsafeArray, options) case v: LocalDate => builder.setDate(v.toEpochDay.toInt) case v: Decimal => builder.setDecimal(decimalBuilder(Math.max(v.precision, v.scale), v.scale, v.toString)) @@ -172,35 +125,26 @@ object LiteralValueProtoConverter { private def toLiteralProtoBuilderInternal( literal: Any, dataType: DataType, - options: ToLiteralProtoOptions, - needDataType: Boolean): proto.Expression.Literal.Builder = { + options: ToLiteralProtoOptions): proto.Expression.Literal.Builder = { val builder = proto.Expression.Literal.newBuilder() def arrayBuilder(scalaValue: Any, elementType: DataType, containsNull: Boolean) = { val ab = builder.getArrayBuilder - var needElementType = needDataType scalaValue match { case a: Array[_] => a.foreach { item => - ab.addElements( - toLiteralProtoBuilderInternal(item, elementType, options, needElementType).build()) - needElementType = false + ab.addElements(toLiteralProtoBuilderInternal(item, elementType, options).build()) } case s: scala.collection.Seq[_] => s.foreach { item => - ab.addElements( - toLiteralProtoBuilderInternal(item, elementType, options, needElementType).build()) - needElementType = false + ab.addElements(toLiteralProtoBuilderInternal(item, elementType, options).build()) } case other => throw new IllegalArgumentException(s"literal $other not supported (yet).") } - setArrayTypeAfterAddingElements( - ab, - elementType, - containsNull, - options.useDeprecatedDataTypeFields, - needDataType) + if (options.useDeprecatedDataTypeFields) { + ab.setElementType(toConnectProtoType(elementType)) + } ab } @@ -210,26 +154,19 @@ object LiteralValueProtoConverter { valueType: DataType, valueContainsNull: Boolean) = { val mb = builder.getMapBuilder - var needKeyAndValueType = needDataType scalaValue match { case map: scala.collection.Map[_, _] => map.foreach { case (k, v) => - mb.addKeys( - toLiteralProtoBuilderInternal(k, keyType, options, needKeyAndValueType).build()) - mb.addValues( - toLiteralProtoBuilderInternal(v, valueType, options, needKeyAndValueType).build()) - needKeyAndValueType = false + mb.addKeys(toLiteralProtoBuilderInternal(k, keyType, options).build()) + mb.addValues(toLiteralProtoBuilderInternal(v, valueType, options).build()) } case other => throw new IllegalArgumentException(s"literal $other not supported (yet).") } - setMapTypeAfterAddingKeysAndValues( - mb, - keyType, - valueType, - valueContainsNull, - options.useDeprecatedDataTypeFields, - needDataType) + if (options.useDeprecatedDataTypeFields) { + mb.setKeyType(toConnectProtoType(keyType)) + mb.setValueType(toConnectProtoType(valueType)) + } mb } @@ -248,40 +185,15 @@ object LiteralValueProtoConverter { } var idx = 0 + while (idx < structType.size) { + val field = fields(idx) + val literalProto = + toLiteralProtoBuilderInternal(iter.next(), field.dataType, options) + sb.addElements(literalProto) + idx += 1 + } if (options.useDeprecatedDataTypeFields) { - while (idx < structType.size) { - val field = fields(idx) - val literalProto = - toLiteralProtoWithOptions(iter.next(), Some(field.dataType), options) - sb.addElements(literalProto) - idx += 1 - } sb.setStructType(toConnectProtoType(structType)) - } else { - val dataTypeStruct = proto.DataType.Struct.newBuilder() - while (idx < structType.size) { - val field = fields(idx) - val literalProto = - toLiteralProtoWithOptions(iter.next(), Some(field.dataType), options) - sb.addElements(literalProto) - - val fieldBuilder = dataTypeStruct - .addFieldsBuilder() - .setName(field.name) - .setNullable(field.nullable) - - if (LiteralValueProtoConverter.getInferredDataType(literalProto).isEmpty) { - fieldBuilder.setDataType(toConnectProtoType(field.dataType)) - } - - // Set metadata if available - if (field.metadata != Metadata.empty) { - fieldBuilder.setMetadata(field.metadata.json) - } - - idx += 1 - } - sb.setDataTypeStruct(dataTypeStruct.build()) } sb @@ -290,18 +202,18 @@ object LiteralValueProtoConverter { (literal, dataType) match { case (v: Option[_], _) => if (v.isDefined) { - toLiteralProtoBuilderInternal(v.get, dataType, options, needDataType) + toLiteralProtoBuilderInternal(v.get, dataType, options) } else { - setNullValue(builder, dataType, needDataType) + setNullValue(builder, dataType, options.useDeprecatedDataTypeFields) } case (null, _) => - setNullValue(builder, dataType, needDataType) + setNullValue(builder, dataType, options.useDeprecatedDataTypeFields) case (v: mutable.ArraySeq[_], ArrayType(_, _)) => - toLiteralProtoBuilderInternal(v.array, dataType, options, needDataType) + toLiteralProtoBuilderInternal(v.array, dataType, options) case (v: immutable.ArraySeq[_], ArrayType(_, _)) => - toLiteralProtoBuilderInternal(v.unsafeArray, dataType, options, needDataType) + toLiteralProtoBuilderInternal(v.unsafeArray, dataType, options) case (v: Array[Byte], ArrayType(_, _)) => - toLiteralProtoBuilderInternal(v, options, needDataType) + toLiteralProtoBuilderInternal(v, options) case (v, ArrayType(elementType, containsNull)) => builder.setArray(arrayBuilder(v, elementType, containsNull)) case (v, MapType(keyType, valueType, valueContainsNull)) => @@ -313,7 +225,7 @@ object LiteralValueProtoConverter { builder.getTimeBuilder .setNano(SparkDateTimeUtils.localTimeToNanos(v)) .setPrecision(timeType.precision)) - case _ => toLiteralProtoBuilderInternal(literal, options, needDataType) + case _ => toLiteralProtoBuilderInternal(literal, options) } } @@ -325,20 +237,42 @@ object LiteralValueProtoConverter { * proto.Expression.Literal.Builder */ def toLiteralProtoBuilder(literal: Any): proto.Expression.Literal.Builder = { - toLiteralProtoBuilderInternal( + toLiteralProtoBuilderWithOptions( literal, - ToLiteralProtoOptions(useDeprecatedDataTypeFields = true), - needDataType = true) + None, + ToLiteralProtoOptions(useDeprecatedDataTypeFields = true)) } def toLiteralProtoBuilder( literal: Any, dataType: DataType): proto.Expression.Literal.Builder = { - toLiteralProtoBuilderInternal( + toLiteralProtoBuilderWithOptions( literal, - dataType, - ToLiteralProtoOptions(useDeprecatedDataTypeFields = true), - needDataType = true) + Some(dataType), + ToLiteralProtoOptions(useDeprecatedDataTypeFields = true)) + } + + private def setDataTypeForRootLiteral( + builder: proto.Expression.Literal.Builder, + dataType: DataType): proto.Expression.Literal.Builder = { + if (builder.getLiteralTypeCase == + proto.Expression.Literal.LiteralTypeCase.LITERALTYPE_NOT_SET) { + throw new IllegalArgumentException("Literal type should be set first") + } + // To be compatible with the current Scala behavior, we should convert bytes to binary. + val protoDataType = toConnectProtoType(dataType, bytesToBinary = true) + // If the value is not null and the data type is trivial, we don't need to + // set the data type field, because it will be inferred from the literal value, saving space. + val needDataType = protoDataType.getKindCase match { + case proto.DataType.KindCase.ARRAY => true + case proto.DataType.KindCase.STRUCT => true + case proto.DataType.KindCase.MAP => true + case _ => builder.getLiteralTypeCase == proto.Expression.Literal.LiteralTypeCase.NULL + } + if (needDataType) { + builder.setDataType(protoDataType) + } + builder } def toLiteralProtoBuilderWithOptions( @@ -347,10 +281,29 @@ object LiteralValueProtoConverter { options: ToLiteralProtoOptions): proto.Expression.Literal.Builder = { dataTypeOpt match { case Some(dataType) => - toLiteralProtoBuilderInternal(literal, dataType, options, needDataType = true) + val builder = toLiteralProtoBuilderInternal(literal, dataType, options) + if (!options.useDeprecatedDataTypeFields) { + setDataTypeForRootLiteral(builder, dataType) + } + builder case None => - toLiteralProtoBuilderInternal(literal, options, needDataType = true) + val builder = toLiteralProtoBuilderInternal(literal, options) + if (!options.useDeprecatedDataTypeFields) { + def unwrapArraySeq(value: Any): Any = value match { + case arraySeq: mutable.ArraySeq[_] => unwrapArraySeq(arraySeq.array) + case arraySeq: immutable.ArraySeq[_] => unwrapArraySeq(arraySeq.unsafeArray) + case _ => value + } + unwrapArraySeq(literal) match { + case null => + setDataTypeForRootLiteral(builder, NullType) + case value => + setDataTypeForRootLiteral(builder, toDataType(value.getClass)) + } + } + builder } + } def create[T: TypeTag](v: T): proto.Expression.Literal.Builder = Try { @@ -368,29 +321,25 @@ object LiteralValueProtoConverter { * @return * proto.Expression.Literal */ - def toLiteralProto(literal: Any): proto.Expression.Literal = - toLiteralProtoBuilderInternal( + def toLiteralProto(literal: Any): proto.Expression.Literal = { + toLiteralProtoWithOptions( literal, - ToLiteralProtoOptions(useDeprecatedDataTypeFields = true), - needDataType = true).build() + None, + ToLiteralProtoOptions(useDeprecatedDataTypeFields = true)) + } - def toLiteralProto(literal: Any, dataType: DataType): proto.Expression.Literal = - toLiteralProtoBuilderInternal( + def toLiteralProto(literal: Any, dataType: DataType): proto.Expression.Literal = { + toLiteralProtoWithOptions( literal, - dataType, - ToLiteralProtoOptions(useDeprecatedDataTypeFields = true), - needDataType = true).build() + Some(dataType), + ToLiteralProtoOptions(useDeprecatedDataTypeFields = true)) + } def toLiteralProtoWithOptions( literal: Any, dataTypeOpt: Option[DataType], options: ToLiteralProtoOptions): proto.Expression.Literal = { - dataTypeOpt match { - case Some(dataType) => - toLiteralProtoBuilderInternal(literal, dataType, options, needDataType = true).build() - case None => - toLiteralProtoBuilderInternal(literal, options, needDataType = true).build() - } + toLiteralProtoBuilderWithOptions(literal, dataTypeOpt, options).build() } private[sql] def toDataType(clz: Class[_]): DataType = clz match { @@ -408,6 +357,7 @@ object LiteralValueProtoConverter { case _ if clz == classOf[LocalDate] || clz == classOf[Date] => DateType case _ if clz == classOf[Instant] || clz == classOf[Timestamp] => TimestampType case _ if clz == classOf[LocalDateTime] => TimestampNTZType + case _ if clz == classOf[LocalTime] => TimeType(TimeType.DEFAULT_PRECISION) case _ if clz == classOf[Duration] => DayTimeIntervalType.DEFAULT case _ if clz == classOf[Period] => YearMonthIntervalType.DEFAULT case _ if clz == classOf[JBigDecimal] => DecimalType.SYSTEM_DEFAULT @@ -420,10 +370,13 @@ object LiteralValueProtoConverter { case _ if clz == classOf[JByte] => ByteType case _ if clz == classOf[JFloat] => FloatType case _ if clz == classOf[JBoolean] => BooleanType + case _ if clz == classOf[JChar] => StringType // other scala classes case _ if clz == classOf[String] => StringType - case _ if clz == classOf[BigInt] || clz == classOf[BigDecimal] => DecimalType.SYSTEM_DEFAULT + case _ if clz == classOf[BigInt] => DecimalType.SYSTEM_DEFAULT + case _ if clz == classOf[BigDecimal] => DecimalType.SYSTEM_DEFAULT + case _ if clz == classOf[Decimal] => DecimalType.SYSTEM_DEFAULT case _ if clz == classOf[CalendarInterval] => CalendarIntervalType case _ if clz.isArray => ArrayType(toDataType(clz.getComponentType)) case _ => @@ -431,71 +384,13 @@ object LiteralValueProtoConverter { } def toScalaValue(literal: proto.Expression.Literal): Any = { - literal.getLiteralTypeCase match { - case proto.Expression.Literal.LiteralTypeCase.NULL => null - - case proto.Expression.Literal.LiteralTypeCase.BINARY => literal.getBinary.toByteArray - - case proto.Expression.Literal.LiteralTypeCase.BOOLEAN => literal.getBoolean - - case proto.Expression.Literal.LiteralTypeCase.BYTE => literal.getByte.toByte - - case proto.Expression.Literal.LiteralTypeCase.SHORT => literal.getShort.toShort - - case proto.Expression.Literal.LiteralTypeCase.INTEGER => literal.getInteger - - case proto.Expression.Literal.LiteralTypeCase.LONG => literal.getLong - - case proto.Expression.Literal.LiteralTypeCase.FLOAT => literal.getFloat - - case proto.Expression.Literal.LiteralTypeCase.DOUBLE => literal.getDouble - - case proto.Expression.Literal.LiteralTypeCase.DECIMAL => - Decimal(literal.getDecimal.getValue) - - case proto.Expression.Literal.LiteralTypeCase.STRING => literal.getString - - case proto.Expression.Literal.LiteralTypeCase.DATE => - SparkDateTimeUtils.toJavaDate(literal.getDate) - - case proto.Expression.Literal.LiteralTypeCase.TIMESTAMP => - SparkDateTimeUtils.toJavaTimestamp(literal.getTimestamp) - - case proto.Expression.Literal.LiteralTypeCase.TIMESTAMP_NTZ => - SparkDateTimeUtils.microsToLocalDateTime(literal.getTimestampNtz) - - case proto.Expression.Literal.LiteralTypeCase.CALENDAR_INTERVAL => - new CalendarInterval( - literal.getCalendarInterval.getMonths, - literal.getCalendarInterval.getDays, - literal.getCalendarInterval.getMicroseconds) - - case proto.Expression.Literal.LiteralTypeCase.YEAR_MONTH_INTERVAL => - SparkIntervalUtils.monthsToPeriod(literal.getYearMonthInterval) - - case proto.Expression.Literal.LiteralTypeCase.DAY_TIME_INTERVAL => - SparkIntervalUtils.microsToDuration(literal.getDayTimeInterval) - - case proto.Expression.Literal.LiteralTypeCase.TIME => - SparkDateTimeUtils.nanosToLocalTime(literal.getTime.getNano) - - case proto.Expression.Literal.LiteralTypeCase.ARRAY => - toScalaArray(literal.getArray) - - case proto.Expression.Literal.LiteralTypeCase.MAP => - toScalaMap(literal.getMap) - - case proto.Expression.Literal.LiteralTypeCase.STRUCT => - toScalaStruct(literal.getStruct) - - case other => - throw new UnsupportedOperationException( - s"Unsupported Literal Type: ${other.getNumber} (${other.name})") - } + getScalaConverter(getProtoDataType(literal))(literal) } private def getScalaConverter(dataType: proto.DataType): proto.Expression.Literal => Any = { val converter: proto.Expression.Literal => Any = dataType.getKindCase match { + case proto.DataType.KindCase.NULL => + v => throw InvalidPlanInput(s"Expected null value, but got ${v.getLiteralTypeCase}") case proto.DataType.KindCase.SHORT => v => v.getShort.toShort case proto.DataType.KindCase.INTEGER => v => v.getInteger case proto.DataType.KindCase.LONG => v => v.getLong @@ -523,145 +418,194 @@ object LiteralValueProtoConverter { val interval = v.getCalendarInterval new CalendarInterval(interval.getMonths, interval.getDays, interval.getMicroseconds) case proto.DataType.KindCase.ARRAY => - v => toScalaArrayInternal(v.getArray, dataType.getArray) + v => toScalaArrayInternal(v, dataType.getArray) case proto.DataType.KindCase.MAP => - v => toScalaMapInternal(v.getMap, dataType.getMap) + v => toScalaMapInternal(v, dataType.getMap) case proto.DataType.KindCase.STRUCT => - v => toScalaStructInternal(v.getStruct, dataType.getStruct) + v => toScalaStructInternal(v, dataType.getStruct) case _ => throw InvalidPlanInput(s"Unsupported Literal Type: ${dataType.getKindCase}") } v => if (v.hasNull) null else converter(v) } - private def getInferredDataType( - literal: proto.Expression.Literal, - recursive: Boolean = false): Option[proto.DataType] = { - if (literal.hasNull) { - return Some(literal.getNull) + private def isCompatible( + literalTypeCase: proto.Expression.Literal.LiteralTypeCase, + dataTypeCase: proto.DataType.KindCase): Boolean = { + (literalTypeCase, dataTypeCase) match { + case (proto.Expression.Literal.LiteralTypeCase.NULL, _) => + true + case (proto.Expression.Literal.LiteralTypeCase.BINARY, proto.DataType.KindCase.BINARY) => + true + case (proto.Expression.Literal.LiteralTypeCase.BOOLEAN, proto.DataType.KindCase.BOOLEAN) => + true + case (proto.Expression.Literal.LiteralTypeCase.BYTE, proto.DataType.KindCase.BYTE) => + true + case (proto.Expression.Literal.LiteralTypeCase.SHORT, proto.DataType.KindCase.SHORT) => + true + case (proto.Expression.Literal.LiteralTypeCase.INTEGER, proto.DataType.KindCase.INTEGER) => + true + case (proto.Expression.Literal.LiteralTypeCase.LONG, proto.DataType.KindCase.LONG) => + true + case (proto.Expression.Literal.LiteralTypeCase.FLOAT, proto.DataType.KindCase.FLOAT) => + true + case (proto.Expression.Literal.LiteralTypeCase.DOUBLE, proto.DataType.KindCase.DOUBLE) => + true + case (proto.Expression.Literal.LiteralTypeCase.DECIMAL, proto.DataType.KindCase.DECIMAL) => + true + case (proto.Expression.Literal.LiteralTypeCase.STRING, proto.DataType.KindCase.STRING) => + true + case (proto.Expression.Literal.LiteralTypeCase.DATE, proto.DataType.KindCase.DATE) => + true + case ( + proto.Expression.Literal.LiteralTypeCase.TIMESTAMP, + proto.DataType.KindCase.TIMESTAMP) => + true + case ( + proto.Expression.Literal.LiteralTypeCase.TIMESTAMP_NTZ, + proto.DataType.KindCase.TIMESTAMP_NTZ) => + true + case ( + proto.Expression.Literal.LiteralTypeCase.CALENDAR_INTERVAL, + proto.DataType.KindCase.CALENDAR_INTERVAL) => + true + case ( + proto.Expression.Literal.LiteralTypeCase.DAY_TIME_INTERVAL, + proto.DataType.KindCase.DAY_TIME_INTERVAL) => + true + case ( + proto.Expression.Literal.LiteralTypeCase.YEAR_MONTH_INTERVAL, + proto.DataType.KindCase.YEAR_MONTH_INTERVAL) => + true + case (proto.Expression.Literal.LiteralTypeCase.TIME, proto.DataType.KindCase.TIME) => + true + case (proto.Expression.Literal.LiteralTypeCase.ARRAY, proto.DataType.KindCase.ARRAY) => + true + case (proto.Expression.Literal.LiteralTypeCase.MAP, proto.DataType.KindCase.MAP) => + true + case (proto.Expression.Literal.LiteralTypeCase.STRUCT, proto.DataType.KindCase.STRUCT) => + true + case _ => false } + } - val builder = proto.DataType.newBuilder() - literal.getLiteralTypeCase match { - case proto.Expression.Literal.LiteralTypeCase.BINARY => - builder.setBinary(proto.DataType.Binary.newBuilder.build()) - case proto.Expression.Literal.LiteralTypeCase.BOOLEAN => - builder.setBoolean(proto.DataType.Boolean.newBuilder.build()) - case proto.Expression.Literal.LiteralTypeCase.BYTE => - builder.setByte(proto.DataType.Byte.newBuilder.build()) - case proto.Expression.Literal.LiteralTypeCase.SHORT => - builder.setShort(proto.DataType.Short.newBuilder.build()) - case proto.Expression.Literal.LiteralTypeCase.INTEGER => - builder.setInteger(proto.DataType.Integer.newBuilder.build()) - case proto.Expression.Literal.LiteralTypeCase.LONG => - builder.setLong(proto.DataType.Long.newBuilder.build()) - case proto.Expression.Literal.LiteralTypeCase.FLOAT => - builder.setFloat(proto.DataType.Float.newBuilder.build()) - case proto.Expression.Literal.LiteralTypeCase.DOUBLE => - builder.setDouble(proto.DataType.Double.newBuilder.build()) - case proto.Expression.Literal.LiteralTypeCase.DATE => - builder.setDate(proto.DataType.Date.newBuilder.build()) - case proto.Expression.Literal.LiteralTypeCase.TIMESTAMP => - builder.setTimestamp(proto.DataType.Timestamp.newBuilder.build()) - case proto.Expression.Literal.LiteralTypeCase.TIMESTAMP_NTZ => - builder.setTimestampNtz(proto.DataType.TimestampNTZ.newBuilder.build()) - case proto.Expression.Literal.LiteralTypeCase.CALENDAR_INTERVAL => - builder.setCalendarInterval(proto.DataType.CalendarInterval.newBuilder.build()) - case proto.Expression.Literal.LiteralTypeCase.STRUCT => - if (recursive) { - val struct = literal.getStruct - val size = struct.getElementsCount - val structTypeBuilder = proto.DataType.Struct.newBuilder - var i = 0 - while (i < size) { - val field = struct.getDataTypeStruct.getFields(i) - if (field.hasDataType) { - structTypeBuilder.addFields(field) - } else { - val element = struct.getElements(i) - getInferredDataType(element, recursive = true) match { - case Some(dataType) => - val fieldBuilder = structTypeBuilder.addFieldsBuilder() - fieldBuilder.setName(field.getName) - fieldBuilder.setDataType(dataType) - fieldBuilder.setNullable(field.getNullable) - if (field.hasMetadata) { - fieldBuilder.setMetadata(field.getMetadata) - } - case None => return None - } + def getProtoDataType(literal: proto.Expression.Literal): proto.DataType = { + val dataType = if (literal.hasDataType) { + literal.getDataType + } else { + // For backward compatibility, we still support the old way to + // define the data type of the literal. + if (literal.getLiteralTypeCase == proto.Expression.Literal.LiteralTypeCase.NULL) { + literal.getNull + } else { + val builder = proto.DataType.newBuilder() + literal.getLiteralTypeCase match { + case proto.Expression.Literal.LiteralTypeCase.BINARY => + builder.setBinary(proto.DataType.Binary.newBuilder().build()) + case proto.Expression.Literal.LiteralTypeCase.BOOLEAN => + builder.setBoolean(proto.DataType.Boolean.newBuilder().build()) + case proto.Expression.Literal.LiteralTypeCase.BYTE => + builder.setByte(proto.DataType.Byte.newBuilder().build()) + case proto.Expression.Literal.LiteralTypeCase.SHORT => + builder.setShort(proto.DataType.Short.newBuilder().build()) + case proto.Expression.Literal.LiteralTypeCase.INTEGER => + builder.setInteger(proto.DataType.Integer.newBuilder().build()) + case proto.Expression.Literal.LiteralTypeCase.LONG => + builder.setLong(proto.DataType.Long.newBuilder().build()) + case proto.Expression.Literal.LiteralTypeCase.FLOAT => + builder.setFloat(proto.DataType.Float.newBuilder().build()) + case proto.Expression.Literal.LiteralTypeCase.DOUBLE => + builder.setDouble(proto.DataType.Double.newBuilder().build()) + case proto.Expression.Literal.LiteralTypeCase.DECIMAL => + val decimal = Decimal.apply(literal.getDecimal.getValue) + var precision = decimal.precision + if (literal.getDecimal.hasPrecision) { + precision = math.max(precision, literal.getDecimal.getPrecision) } - i += 1 - } - builder.setStruct(structTypeBuilder.build()) - } else { - builder.setStruct(proto.DataType.Struct.newBuilder.build()) - } - case proto.Expression.Literal.LiteralTypeCase.ARRAY => - if (recursive) { - val arrayType = literal.getArray.getDataType - val elementTypeOpt = if (arrayType.hasElementType) { - Some(arrayType.getElementType) - } else if (literal.getArray.getElementsCount > 0) { - getInferredDataType(literal.getArray.getElements(0), recursive = true) - } else { - None - } - if (elementTypeOpt.isDefined) { - builder.setArray( - proto.DataType.Array + var scale = decimal.scale + if (literal.getDecimal.hasScale) { + scale = math.max(scale, literal.getDecimal.getScale) + } + builder.setDecimal( + proto.DataType.Decimal .newBuilder() - .setElementType(elementTypeOpt.get) - .setContainsNull(arrayType.getContainsNull) - .build()) - } else { - return None - } - } else { - builder.setArray(proto.DataType.Array.newBuilder.build()) - } - case proto.Expression.Literal.LiteralTypeCase.MAP => - if (recursive) { - val mapType = literal.getMap.getDataType - val keyTypeOpt = if (mapType.hasKeyType) { - Some(mapType.getKeyType) - } else if (literal.getMap.getKeysCount > 0) { - getInferredDataType(literal.getMap.getKeys(0), recursive = true) - } else { - None - } - val valueTypeOpt = if (mapType.hasValueType) { - Some(mapType.getValueType) - } else if (literal.getMap.getValuesCount > 0) { - getInferredDataType(literal.getMap.getValues(0), recursive = true) - } else { - None - } - if (keyTypeOpt.isDefined && valueTypeOpt.isDefined) { - builder.setMap( - proto.DataType.Map.newBuilder - .setKeyType(keyTypeOpt.get) - .setValueType(valueTypeOpt.get) - .setValueContainsNull(mapType.getValueContainsNull) + .setPrecision(math.max(precision, scale)) + .setScale(scale) .build()) - } else { - return None - } - } else { - builder.setMap(proto.DataType.Map.newBuilder.build()) + case proto.Expression.Literal.LiteralTypeCase.STRING => + builder.setString(proto.DataType.String.newBuilder().build()) + case proto.Expression.Literal.LiteralTypeCase.DATE => + builder.setDate(proto.DataType.Date.newBuilder().build()) + case proto.Expression.Literal.LiteralTypeCase.TIMESTAMP => + builder.setTimestamp(proto.DataType.Timestamp.newBuilder().build()) + case proto.Expression.Literal.LiteralTypeCase.TIMESTAMP_NTZ => + builder.setTimestampNtz(proto.DataType.TimestampNTZ.newBuilder().build()) + case proto.Expression.Literal.LiteralTypeCase.CALENDAR_INTERVAL => + builder.setCalendarInterval(proto.DataType.CalendarInterval.newBuilder().build()) + case proto.Expression.Literal.LiteralTypeCase.YEAR_MONTH_INTERVAL => + builder.setYearMonthInterval(proto.DataType.YearMonthInterval.newBuilder().build()) + case proto.Expression.Literal.LiteralTypeCase.DAY_TIME_INTERVAL => + builder.setDayTimeInterval(proto.DataType.DayTimeInterval.newBuilder().build()) + case proto.Expression.Literal.LiteralTypeCase.TIME => + val timeBuilder = proto.DataType.Time.newBuilder() + if (literal.getTime.hasPrecision) { + timeBuilder.setPrecision(literal.getTime.getPrecision) + } + builder.setTime(timeBuilder.build()) + case proto.Expression.Literal.LiteralTypeCase.ARRAY => + if (literal.getArray.hasElementType) { + builder.setArray( + proto.DataType.Array + .newBuilder() + .setElementType(literal.getArray.getElementType) + .setContainsNull(true) + .build()) + } else { + throw InvalidPlanInput("Data type information is missing in the array literal.") + } + case proto.Expression.Literal.LiteralTypeCase.MAP => + if (literal.getMap.hasKeyType && literal.getMap.hasValueType) { + builder.setMap( + proto.DataType.Map + .newBuilder() + .setKeyType(literal.getMap.getKeyType) + .setValueType(literal.getMap.getValueType) + .setValueContainsNull(true) + .build()) + } else { + throw InvalidPlanInput("Data type information is missing in the map literal.") + } + case proto.Expression.Literal.LiteralTypeCase.STRUCT => + if (literal.getStruct.hasStructType) { + builder.setStruct(literal.getStruct.getStructType.getStruct) + } else { + throw InvalidPlanInput("Data type information is missing in the struct literal.") + } + case _ => + throw InvalidPlanInput( + s"Unsupported Literal Type: ${literal.getLiteralTypeCase.name}" + + s"(${literal.getLiteralTypeCase.getNumber})") } - case _ => - // Not all data types support inferring the data type from the literal at the moment. - // e.g. the type of DayTimeInterval contains extra information like start_field and - // end_field and cannot be inferred from the literal. - return None + builder.build() + } + } + + if (!isCompatible(literal.getLiteralTypeCase, dataType.getKindCase)) { + throw InvalidPlanInput( + s"Incompatible data type ${dataType.getKindCase} " + + s"for literal ${literal.getLiteralTypeCase}") } - Some(builder.build()) + + dataType } private def toScalaArrayInternal( - array: proto.Expression.Literal.Array, + literal: proto.Expression.Literal, arrayType: proto.DataType.Array): Array[_] = { + if (!literal.hasArray) { + throw InvalidPlanInput("Array literal is not set.") + } + val array = literal.getArray def makeArrayData[T](converter: proto.Expression.Literal => T)(implicit tag: ClassTag[T]): Array[T] = { val size = array.getElementsCount @@ -675,32 +619,13 @@ object LiteralValueProtoConverter { makeArrayData(getScalaConverter(arrayType.getElementType)) } - def getProtoArrayType(array: proto.Expression.Literal.Array): proto.DataType.Array = { - if (array.hasDataType) { - val literal = proto.Expression.Literal.newBuilder().setArray(array).build() - getInferredDataType(literal, recursive = true) match { - case Some(dataType) => dataType.getArray - case None => throw InvalidPlanInput("Cannot infer data type from this array literal.") - } - } else if (array.hasElementType) { - // For backward compatibility, we still support the old way to - // define the type of the array. - proto.DataType.Array.newBuilder - .setElementType(array.getElementType) - .setContainsNull(true) - .build() - } else { - throw InvalidPlanInput("Data type information is missing in the array literal.") - } - } - - def toScalaArray(array: proto.Expression.Literal.Array): Array[_] = { - toScalaArrayInternal(array, getProtoArrayType(array)) - } - private def toScalaMapInternal( - map: proto.Expression.Literal.Map, + literal: proto.Expression.Literal, mapType: proto.DataType.Map): mutable.Map[_, _] = { + if (!literal.hasMap) { + throw InvalidPlanInput("Map literal is not set.") + } + val map = literal.getMap def makeMapData[K, V]( keyConverter: proto.Expression.Literal => K, valueConverter: proto.Expression.Literal => V)(implicit @@ -720,33 +645,13 @@ object LiteralValueProtoConverter { makeMapData(getScalaConverter(mapType.getKeyType), getScalaConverter(mapType.getValueType)) } - def getProtoMapType(map: proto.Expression.Literal.Map): proto.DataType.Map = { - if (map.hasDataType) { - val literal = proto.Expression.Literal.newBuilder().setMap(map).build() - getInferredDataType(literal, recursive = true) match { - case Some(dataType) => dataType.getMap - case None => throw InvalidPlanInput("Cannot infer data type from this map literal.") - } - } else if (map.hasKeyType && map.hasValueType) { - // For backward compatibility, we still support the old way to - // define the type of the map. - proto.DataType.Map.newBuilder - .setKeyType(map.getKeyType) - .setValueType(map.getValueType) - .setValueContainsNull(true) - .build() - } else { - throw InvalidPlanInput("Data type information is missing in the map literal.") - } - } - - def toScalaMap(map: proto.Expression.Literal.Map): mutable.Map[_, _] = { - toScalaMapInternal(map, getProtoMapType(map)) - } - private def toScalaStructInternal( - struct: proto.Expression.Literal.Struct, + literal: proto.Expression.Literal, structType: proto.DataType.Struct): Any = { + if (!literal.hasStruct) { + throw InvalidPlanInput("Struct literal is not set.") + } + val struct = literal.getStruct val structData = Array.tabulate(struct.getElementsCount) { i => val element = struct.getElements(i) val dataType = structType.getFields(i).getDataType @@ -755,96 +660,7 @@ object LiteralValueProtoConverter { new GenericRowWithSchema(structData, DataTypeProtoConverter.toCatalystStructType(structType)) } - def getProtoStructType(struct: proto.Expression.Literal.Struct): proto.DataType.Struct = { - if (struct.hasDataTypeStruct) { - val literal = proto.Expression.Literal.newBuilder().setStruct(struct).build() - getInferredDataType(literal, recursive = true) match { - case Some(dataType) => dataType.getStruct - case None => throw InvalidPlanInput("Cannot infer data type from this struct literal.") - } - } else if (struct.hasStructType) { - // For backward compatibility, we still support the old way to - // define and convert struct types. - struct.getStructType.getStruct - } else { - throw InvalidPlanInput("Data type information is missing in the struct literal.") - } - } - - def toScalaStruct(struct: proto.Expression.Literal.Struct): Any = { - toScalaStructInternal(struct, getProtoStructType(struct)) - } - - def getDataType(lit: proto.Expression.Literal): DataType = { - lit.getLiteralTypeCase match { - case proto.Expression.Literal.LiteralTypeCase.NULL => - DataTypeProtoConverter.toCatalystType(lit.getNull) - case proto.Expression.Literal.LiteralTypeCase.BINARY => - BinaryType - case proto.Expression.Literal.LiteralTypeCase.BOOLEAN => - BooleanType - case proto.Expression.Literal.LiteralTypeCase.BYTE => - ByteType - case proto.Expression.Literal.LiteralTypeCase.SHORT => - ShortType - case proto.Expression.Literal.LiteralTypeCase.INTEGER => - IntegerType - case proto.Expression.Literal.LiteralTypeCase.LONG => - LongType - case proto.Expression.Literal.LiteralTypeCase.FLOAT => - FloatType - case proto.Expression.Literal.LiteralTypeCase.DOUBLE => - DoubleType - case proto.Expression.Literal.LiteralTypeCase.DECIMAL => - val decimal = Decimal.apply(lit.getDecimal.getValue) - var precision = decimal.precision - if (lit.getDecimal.hasPrecision) { - precision = math.max(precision, lit.getDecimal.getPrecision) - } - var scale = decimal.scale - if (lit.getDecimal.hasScale) { - scale = math.max(scale, lit.getDecimal.getScale) - } - DecimalType(math.max(precision, scale), scale) - case proto.Expression.Literal.LiteralTypeCase.STRING => - StringType - case proto.Expression.Literal.LiteralTypeCase.DATE => - DateType - case proto.Expression.Literal.LiteralTypeCase.TIMESTAMP => - TimestampType - case proto.Expression.Literal.LiteralTypeCase.TIMESTAMP_NTZ => - TimestampNTZType - case proto.Expression.Literal.LiteralTypeCase.CALENDAR_INTERVAL => - CalendarIntervalType - case proto.Expression.Literal.LiteralTypeCase.YEAR_MONTH_INTERVAL => - YearMonthIntervalType() - case proto.Expression.Literal.LiteralTypeCase.DAY_TIME_INTERVAL => - DayTimeIntervalType() - case proto.Expression.Literal.LiteralTypeCase.TIME => - var precision = TimeType.DEFAULT_PRECISION - if (lit.getTime.hasPrecision) { - precision = lit.getTime.getPrecision - } - TimeType(precision) - case proto.Expression.Literal.LiteralTypeCase.ARRAY => - DataTypeProtoConverter.toCatalystType( - proto.DataType.newBuilder - .setArray(LiteralValueProtoConverter.getProtoArrayType(lit.getArray)) - .build()) - case proto.Expression.Literal.LiteralTypeCase.MAP => - DataTypeProtoConverter.toCatalystType( - proto.DataType.newBuilder - .setMap(LiteralValueProtoConverter.getProtoMapType(lit.getMap)) - .build()) - case proto.Expression.Literal.LiteralTypeCase.STRUCT => - DataTypeProtoConverter.toCatalystType( - proto.DataType.newBuilder - .setStruct(LiteralValueProtoConverter.getProtoStructType(lit.getStruct)) - .build()) - case _ => - throw InvalidPlanInput( - s"Unsupported Literal Type: ${lit.getLiteralTypeCase.name}" + - s"(${lit.getLiteralTypeCase.getNumber})") - } + def getDataType(literal: proto.Expression.Literal): DataType = { + DataTypeProtoConverter.toCatalystType(getProtoDataType(literal)) } } diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lag.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lag.json index 53d57913dd3a..89b9968dd33d 100644 --- a/sql/connect/common/src/test/resources/query-tests/queries/function_lag.json +++ b/sql/connect/common/src/test/resources/query-tests/queries/function_lag.json @@ -63,6 +63,10 @@ "null": { "null": { } + }, + "dataType": { + "null": { + } } }, "common": { diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lag.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lag.proto.bin index c8030a0979c6..872188c946fd 100644 Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_lag.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_lag.proto.bin differ diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lit.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lit.json index a899c9f410aa..176aab1deda6 100644 --- a/sql/connect/common/src/test/resources/query-tests/queries/function_lit.json +++ b/sql/connect/common/src/test/resources/query-tests/queries/function_lit.json @@ -362,8 +362,14 @@ "integer": 8 }, { "integer": 6 - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "integer": { + } + }, "containsNull": true } } @@ -390,6 +396,10 @@ "null": { "null": { } + }, + "dataType": { + "null": { + } } }, "common": { diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin index 26c7b3a7dc02..6a296702f064 100644 Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin differ diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.json index 153478ce75bb..65902ad604b4 100644 --- a/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.json +++ b/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.json @@ -14,7 +14,9 @@ "expressions": [{ "literal": { "array": { - "dataType": { + }, + "dataType": { + "array": { "elementType": { "double": { } @@ -47,10 +49,7 @@ "array": { "elements": [{ "integer": 1 - }], - "dataType": { - "containsNull": true - } + }] } }, { "array": { @@ -64,8 +63,19 @@ "integer": 3 }] } - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "array": { + "elementType": { + "integer": { + } + }, + "containsNull": true + } + }, "containsNull": true } } @@ -96,15 +106,9 @@ "array": { "elements": [{ "integer": 1 - }], - "dataType": { - "containsNull": true - } + }] } - }], - "dataType": { - "containsNull": true - } + }] } }, { "array": { @@ -126,8 +130,24 @@ } }] } - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "array": { + "elementType": { + "array": { + "elementType": { + "integer": { + } + }, + "containsNull": true + } + }, + "containsNull": true + } + }, "containsNull": true } } @@ -156,8 +176,14 @@ "boolean": true }, { "boolean": false - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "boolean": { + } + }, "containsNull": true } } @@ -209,8 +235,14 @@ "short": 9873 }, { "short": 9874 - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "short": { + } + }, "containsNull": true } } @@ -241,8 +273,14 @@ "integer": 8726532 }, { "integer": -8726533 - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "integer": { + } + }, "containsNull": true } } @@ -273,8 +311,14 @@ "long": "7834609328726532" }, { "long": "7834609328726533" - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "long": { + } + }, "containsNull": true } } @@ -305,8 +349,14 @@ "double": 1.0 }, { "double": 2.0 - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "double": { + } + }, "containsNull": true } } @@ -337,8 +387,14 @@ "float": -0.7 }, { "float": -0.9 - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "float": { + } + }, "containsNull": true } } @@ -375,8 +431,10 @@ "precision": 7, "scale": 5 } - }], - "dataType": { + }] + }, + "dataType": { + "array": { "elementType": { "decimal": { "scale": 18, @@ -419,8 +477,10 @@ "precision": 12, "scale": 7 } - }], - "dataType": { + }] + }, + "dataType": { + "array": { "elementType": { "decimal": { "scale": 18, @@ -455,8 +515,10 @@ "string": "connect!" }, { "string": "disconnect!" - }], - "dataType": { + }] + }, + "dataType": { + "array": { "elementType": { "string": { "collation": "UTF8_BINARY" @@ -511,8 +573,10 @@ "string": "ABCDEFGHIJ" }, { "string": "BCDEFGHIJK" - }], - "dataType": { + }] + }, + "dataType": { + "array": { "elementType": { "string": { "collation": "UTF8_BINARY" @@ -546,8 +610,14 @@ "date": 18545 }, { "date": 18546 - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "date": { + } + }, "containsNull": true } } @@ -576,8 +646,14 @@ "timestamp": "1677155519808000" }, { "timestamp": "1677155519809000" - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "timestamp": { + } + }, "containsNull": true } } @@ -606,8 +682,14 @@ "timestamp": "12345000" }, { "timestamp": "23456000" - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "timestamp": { + } + }, "containsNull": true } } @@ -636,8 +718,14 @@ "timestampNtz": "1677184560000000" }, { "timestampNtz": "1677188160000000" - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "timestampNtz": { + } + }, "containsNull": true } } @@ -666,8 +754,14 @@ "date": 19411 }, { "date": 19417 - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "date": { + } + }, "containsNull": true } } @@ -696,8 +790,10 @@ "dayTimeInterval": "100000000" }, { "dayTimeInterval": "200000000" - }], - "dataType": { + }] + }, + "dataType": { + "array": { "elementType": { "dayTimeInterval": { "startField": 0, @@ -732,8 +828,10 @@ "yearMonthInterval": 0 }, { "yearMonthInterval": 0 - }], - "dataType": { + }] + }, + "dataType": { + "array": { "elementType": { "yearMonthInterval": { "startField": 0, @@ -776,8 +874,14 @@ "days": 21, "microseconds": "200" } - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "calendarInterval": { + } + }, "containsNull": true } } diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin index d9edb4100b0d..7e2b7c3bf999 100644 Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin differ diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json index 1b989d402ee4..41ca771596ef 100644 --- a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json +++ b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json @@ -77,6 +77,10 @@ }, { "literal": { "null": { + "null": { + } + }, + "dataType": { "string": { "collation": "UTF8_BINARY" } @@ -408,8 +412,14 @@ "integer": 8 }, { "integer": 6 - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "integer": { + } + } } } }, @@ -435,6 +445,10 @@ "null": { "null": { } + }, + "dataType": { + "null": { + } } }, "common": { @@ -703,22 +717,44 @@ "days": 20, "microseconds": "100" } - }], - "dataTypeStruct": { + }] + }, + "dataType": { + "struct": { "fields": [{ "name": "_1", + "dataType": { + "date": { + } + }, "nullable": true }, { "name": "_2", + "dataType": { + "timestamp": { + } + }, "nullable": true }, { "name": "_3", + "dataType": { + "timestamp": { + } + }, "nullable": true }, { "name": "_4", + "dataType": { + "timestampNtz": { + } + }, "nullable": true }, { "name": "_5", + "dataType": { + "date": { + } + }, "nullable": true }, { "name": "_6", @@ -748,6 +784,10 @@ "nullable": true }, { "name": "_9", + "dataType": { + "calendarInterval": { + } + }, "nullable": true }] } @@ -800,8 +840,14 @@ "integer": 2 }, { "integer": 3 - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "integer": { + } + } } } }, @@ -827,7 +873,7 @@ "array": { "elements": [{ "null": { - "integer": { + "null": { } } }, { @@ -835,8 +881,14 @@ "null": { } } - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "integer": { + } + }, "containsNull": true } } @@ -863,22 +915,7 @@ "array": { "elements": [{ "null": { - "struct": { - "fields": [{ - "name": "_1", - "dataType": { - "integer": { - } - } - }, { - "name": "_2", - "dataType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "nullable": true - }] + "null": { } } }, { @@ -892,20 +929,7 @@ "integer": 1 }, { "string": "a" - }], - "dataTypeStruct": { - "fields": [{ - "name": "_1" - }, { - "name": "_2", - "dataType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "nullable": true - }] - } + }] } }, { "struct": { @@ -913,22 +937,34 @@ "integer": 2 }, { "null": { - "string": { - "collation": "UTF8_BINARY" + "null": { } } - }], - "dataTypeStruct": { + }] + } + }] + }, + "dataType": { + "array": { + "elementType": { + "struct": { "fields": [{ - "name": "_1" + "name": "_1", + "dataType": { + "integer": { + } + } }, { "name": "_2", + "dataType": { + "string": { + "collation": "UTF8_BINARY" + } + }, "nullable": true }] } - } - }], - "dataType": { + }, "containsNull": true } } @@ -955,22 +991,7 @@ "array": { "elements": [{ "null": { - "struct": { - "fields": [{ - "name": "_1", - "dataType": { - "integer": { - } - } - }, { - "name": "_2", - "dataType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "nullable": true - }] + "null": { } } }, { @@ -984,10 +1005,20 @@ "integer": 1 }, { "string": "a" - }], - "dataTypeStruct": { + }] + } + }] + }, + "dataType": { + "array": { + "elementType": { + "struct": { "fields": [{ - "name": "_1" + "name": "_1", + "dataType": { + "integer": { + } + } }, { "name": "_2", "dataType": { @@ -998,9 +1029,7 @@ "nullable": true }] } - } - }], - "dataType": { + }, "containsNull": true } } @@ -1031,8 +1060,14 @@ "integer": 2 }, { "integer": 3 - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "integer": { + } + } } } }, @@ -1065,12 +1100,18 @@ "integer": 1 }, { "integer": 2 - }], - "dataType": { + }] + }, + "dataType": { + "map": { "keyType": { "string": { "collation": "UTF8_BINARY" } + }, + "valueType": { + "integer": { + } } } } @@ -1102,7 +1143,7 @@ }], "values": [{ "null": { - "integer": { + "null": { } } }, { @@ -1110,13 +1151,19 @@ "null": { } } - }], - "dataType": { + }] + }, + "dataType": { + "map": { "keyType": { "string": { "collation": "UTF8_BINARY" } }, + "valueType": { + "integer": { + } + }, "valueContainsNull": true } } @@ -1147,8 +1194,10 @@ "integer": 2 }, { "double": 1.0 - }], - "dataTypeStruct": { + }] + }, + "dataType": { + "struct": { "fields": [{ "name": "_1", "dataType": { @@ -1158,9 +1207,17 @@ }, "nullable": true }, { - "name": "_2" + "name": "_2", + "dataType": { + "integer": { + } + } }, { - "name": "_3" + "name": "_3", + "dataType": { + "double": { + } + } }] } } @@ -1185,6 +1242,10 @@ }, { "literal": { "null": { + "null": { + } + }, + "dataType": { "integer": { } } @@ -1211,8 +1272,14 @@ "array": { "elements": [{ "integer": 1 - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "integer": { + } + }, "containsNull": true } } @@ -1242,11 +1309,21 @@ }], "values": [{ "null": { - "integer": { + "null": { } } - }], - "dataType": { + }] + }, + "dataType": { + "map": { + "keyType": { + "integer": { + } + }, + "valueType": { + "integer": { + } + }, "valueContainsNull": true } } @@ -1276,11 +1353,21 @@ }], "values": [{ "null": { - "integer": { + "null": { } } - }], - "dataType": { + }] + }, + "dataType": { + "map": { + "keyType": { + "integer": { + } + }, + "valueType": { + "integer": { + } + }, "valueContainsNull": true } } @@ -1310,11 +1397,21 @@ }], "values": [{ "null": { - "integer": { + "null": { } } - }], - "dataType": { + }] + }, + "dataType": { + "map": { + "keyType": { + "integer": { + } + }, + "valueType": { + "integer": { + } + }, "valueContainsNull": true } } @@ -1347,9 +1444,7 @@ "integer": 2 }, { "integer": 3 - }], - "dataType": { - } + }] } }, { "array": { @@ -1371,8 +1466,18 @@ "integer": 9 }] } - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "array": { + "elementType": { + "integer": { + } + } + } + }, "containsNull": true } } @@ -1409,33 +1514,9 @@ "string": "3" }, { "string": "4" - }], - "dataType": { - "elementType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "containsNull": true - } + }] } - }], - "dataTypeStruct": { - "fields": [{ - "name": "_1" - }, { - "name": "_2", - "dataType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "nullable": true - }, { - "name": "_3", - "nullable": true - }] - } + }] } }, { "struct": { @@ -1445,19 +1526,21 @@ "string": "6" }, { "array": { - "dataType": { - "elementType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "containsNull": true - } } - }], - "dataTypeStruct": { + }] + } + }] + }, + "dataType": { + "array": { + "elementType": { + "struct": { "fields": [{ - "name": "_1" + "name": "_1", + "dataType": { + "integer": { + } + } }, { "name": "_2", "dataType": { @@ -1468,12 +1551,20 @@ "nullable": true }, { "name": "_3", + "dataType": { + "array": { + "elementType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "containsNull": true + } + }, "nullable": true }] } - } - }], - "dataType": { + }, "containsNull": true } } @@ -1504,20 +1595,7 @@ "integer": 1 }, { "string": "2" - }], - "dataTypeStruct": { - "fields": [{ - "name": "a" - }, { - "name": "b", - "dataType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "nullable": true - }] - } + }] } }, { "struct": { @@ -1525,20 +1603,7 @@ "integer": 3 }, { "string": "4" - }], - "dataTypeStruct": { - "fields": [{ - "name": "a" - }, { - "name": "b", - "dataType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "nullable": true - }] - } + }] } }, { "struct": { @@ -1546,10 +1611,20 @@ "integer": 5 }, { "string": "6" - }], - "dataTypeStruct": { + }] + } + }] + }, + "dataType": { + "array": { + "elementType": { + "struct": { "fields": [{ - "name": "a" + "name": "a", + "dataType": { + "integer": { + } + } }, { "name": "b", "dataType": { @@ -1560,9 +1635,7 @@ "nullable": true }] } - } - }], - "dataType": { + }, "containsNull": true } } @@ -1598,14 +1671,7 @@ "integer": 1 }, { "integer": 2 - }], - "dataType": { - "keyType": { - "string": { - "collation": "UTF8_BINARY" - } - } - } + }] } }, { "map": { @@ -1633,8 +1699,23 @@ "integer": 6 }] } - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "map": { + "keyType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "valueType": { + "integer": { + } + } + } + }, "containsNull": true } } @@ -1672,15 +1753,7 @@ "string": "1" }, { "string": "2" - }], - "dataType": { - "elementType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "containsNull": true - } + }] } }, { "array": { @@ -1690,15 +1763,7 @@ "string": "4" }] } - }], - "dataType": { - "keyType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "valueContainsNull": true - } + }] } }, { "map": { @@ -1740,8 +1805,30 @@ } }] } - }], - "dataType": { + }] + }, + "dataType": { + "array": { + "elementType": { + "map": { + "keyType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "valueType": { + "array": { + "elementType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "containsNull": true + } + }, + "valueContainsNull": true + } + }, "containsNull": true } } @@ -1782,14 +1869,7 @@ "integer": 1 }, { "integer": 2 - }], - "dataType": { - "keyType": { - "string": { - "collation": "UTF8_BINARY" - } - } - } + }] } }, { "map": { @@ -1804,8 +1884,27 @@ "integer": 4 }] } - }], - "dataType": { + }] + }, + "dataType": { + "map": { + "keyType": { + "integer": { + } + }, + "valueType": { + "map": { + "keyType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "valueType": { + "integer": { + } + } + } + }, "valueContainsNull": true } } @@ -1838,9 +1937,7 @@ "integer": 2 }, { "integer": 3 - }], - "dataType": { - } + }] } }, { "map": { @@ -1853,14 +1950,7 @@ "integer": 1 }, { "integer": 2 - }], - "dataType": { - "keyType": { - "string": { - "collation": "UTF8_BINARY" - } - } - } + }] } }, { "struct": { @@ -1877,42 +1967,73 @@ "string": "a" }, { "string": "b" - }], - "dataType": { - "valueType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "valueContainsNull": true - } + }] } - }], - "dataTypeStruct": { - "fields": [{ - "name": "_1", - "dataType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "nullable": true - }, { - "name": "_2", - "nullable": true - }] - } + }] } - }], - "dataTypeStruct": { + }] + }, + "dataType": { + "struct": { "fields": [{ "name": "_1", + "dataType": { + "array": { + "elementType": { + "integer": { + } + } + } + }, "nullable": true }, { "name": "_2", + "dataType": { + "map": { + "keyType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "valueType": { + "integer": { + } + } + } + }, "nullable": true }, { "name": "_3", + "dataType": { + "struct": { + "fields": [{ + "name": "_1", + "dataType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "nullable": true + }, { + "name": "_2", + "dataType": { + "map": { + "keyType": { + "integer": { + } + }, + "valueType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "valueContainsNull": true + } + }, + "nullable": true + }] + } + }, "nullable": true }] } diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin index 734f8576d24e..5068b513a927 100644 Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin differ diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/Serializer.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/Serializer.scala index 6e01090f8087..6863818d00ef 100644 --- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/Serializer.scala +++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/Serializer.scala @@ -165,15 +165,16 @@ private[ml] object Serializer { case proto.Expression.Literal.LiteralTypeCase.BOOLEAN => (literal.getBoolean.asInstanceOf[Object], classOf[Boolean]) case proto.Expression.Literal.LiteralTypeCase.ARRAY => - val scalaArray = LiteralValueProtoConverter.toScalaArray(literal.getArray) - val arrayType = LiteralValueProtoConverter.getProtoArrayType(literal.getArray) - arrayType.getElementType.getKindCase match { + val scalaArray = + LiteralValueProtoConverter.toScalaValue(literal).asInstanceOf[Array[_]] + val dataType = LiteralValueProtoConverter.getProtoDataType(literal) + dataType.getArray.getElementType.getKindCase match { case proto.DataType.KindCase.DOUBLE => (MLUtils.reconcileArray(classOf[Double], scalaArray), classOf[Array[Double]]) case proto.DataType.KindCase.STRING => (MLUtils.reconcileArray(classOf[String], scalaArray), classOf[Array[String]]) case proto.DataType.KindCase.ARRAY => - arrayType.getElementType.getArray.getElementType.getKindCase match { + dataType.getArray.getElementType.getArray.getElementType.getKindCase match { case proto.DataType.KindCase.STRING => ( MLUtils.reconcileArray(classOf[Array[String]], scalaArray), diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala index 9a2827cf8b55..3eab2560bcc1 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala @@ -124,7 +124,7 @@ class LiteralExpressionProtoConverterSuite extends AnyFunSuite { // scalastyle:i Seq(1, 2, 3), Some(ArrayType(IntegerType, containsNull = false)), ToLiteralProtoOptions(useDeprecatedDataTypeFields = true)) - assert(!literalProto.getArray.hasDataType) + assert(!literalProto.hasDataType) assert(literalProto.getArray.getElementsList.size == 3) assert(literalProto.getArray.getElementType.hasInteger) @@ -147,7 +147,7 @@ class LiteralExpressionProtoConverterSuite extends AnyFunSuite { // scalastyle:i Map[String, Int]("a" -> 1, "b" -> 2), Some(MapType(StringType, IntegerType, valueContainsNull = false)), ToLiteralProtoOptions(useDeprecatedDataTypeFields = true)) - assert(!literalProto.getMap.hasDataType) + assert(!literalProto.hasDataType) assert(literalProto.getMap.getKeysList.size == 2) assert(literalProto.getMap.getValuesList.size == 2) assert(literalProto.getMap.getKeyType.hasString) @@ -180,7 +180,7 @@ class LiteralExpressionProtoConverterSuite extends AnyFunSuite { // scalastyle:i StructField("a", IntegerType, nullable = true), StructField("b", StringType, nullable = false)))), ToLiteralProtoOptions(useDeprecatedDataTypeFields = true)) - assert(!structProto.getStruct.hasDataTypeStruct) + assert(!structProto.hasDataType) assert(structProto.getStruct.getElementsList.size == 2) val structTypeProto = structProto.getStruct.getStructType.getStruct assert(structTypeProto.getFieldsList.size == 2) @@ -189,8 +189,8 @@ class LiteralExpressionProtoConverterSuite extends AnyFunSuite { // scalastyle:i assert(structTypeProto.getFieldsList.get(1).getName == "b") assert(structTypeProto.getFieldsList.get(1).getDataType.hasString) - val result = LiteralValueProtoConverter.toScalaStruct(structProto.getStruct) - val resultType = LiteralValueProtoConverter.getProtoStructType(structProto.getStruct) + val result = LiteralValueProtoConverter.toScalaValue(structProto) + val resultType = LiteralValueProtoConverter.getProtoDataType(structProto) // Verify the result is a GenericRowWithSchema with correct values assert(result.isInstanceOf[GenericRowWithSchema]) @@ -200,105 +200,15 @@ class LiteralExpressionProtoConverterSuite extends AnyFunSuite { // scalastyle:i assert(row.get(1) == "test") // Verify the returned struct type matches the original - assert(resultType.getFieldsCount == 2) - assert(resultType.getFields(0).getName == "a") - assert(resultType.getFields(0).getDataType.hasInteger) - assert(resultType.getFields(0).getNullable) - assert(resultType.getFields(1).getName == "b") - assert(resultType.getFields(1).getDataType.hasString) - assert(!resultType.getFields(1).getNullable) - } - - test("data types of struct fields are not set for inferable types") { - val literalProto = toLiteralProto( - (1, 2.0, true, (1, 2)), - StructType( - Seq( - StructField("a", IntegerType), - StructField("b", DoubleType), - StructField("c", BooleanType), - StructField( - "d", - StructType(Seq(StructField("e", IntegerType), StructField("f", IntegerType))))))) - assert(!literalProto.getStruct.getDataTypeStruct.getFieldsList.get(0).hasDataType) - assert(!literalProto.getStruct.getDataTypeStruct.getFieldsList.get(1).hasDataType) - assert(!literalProto.getStruct.getDataTypeStruct.getFieldsList.get(2).hasDataType) - assert(!literalProto.getStruct.getDataTypeStruct.getFieldsList.get(3).hasDataType) - } - - test("data types of struct fields are set for non-inferable types") { - val literalProto = toLiteralProto( - ("string", Decimal(1)), - StructType(Seq(StructField("a", StringType), StructField("b", DecimalType(10, 2))))) - assert(literalProto.getStruct.getDataTypeStruct.getFieldsList.get(0).hasDataType) - assert(literalProto.getStruct.getDataTypeStruct.getFieldsList.get(1).hasDataType) - } - - test("nullable and metadata fields are set for struct literal proto") { - val literalProto = toLiteralProto( - ("string", Decimal(1)), - StructType(Seq( - StructField("a", StringType, nullable = true, Metadata.fromJson("""{"key": "value"}""")), - StructField("b", DecimalType(10, 2), nullable = false)))) - val structFields = literalProto.getStruct.getDataTypeStruct.getFieldsList - assert(structFields.get(0).getNullable) - assert(structFields.get(0).hasMetadata) - assert(structFields.get(0).getMetadata == """{"key":"value"}""") - assert(!structFields.get(1).getNullable) - assert(!structFields.get(1).hasMetadata) - - val structTypeProto = LiteralValueProtoConverter.getProtoStructType(literalProto.getStruct) - assert(structTypeProto.getFieldsList.get(0).getNullable) - assert(structTypeProto.getFieldsList.get(0).hasMetadata) - assert(structTypeProto.getFieldsList.get(0).getMetadata == """{"key":"value"}""") - assert(!structTypeProto.getFieldsList.get(1).getNullable) - assert(!structTypeProto.getFieldsList.get(1).hasMetadata) - } - - test("element type of array literal is set for an empty array") { - val literalProto = - toLiteralProto(Array[Int](), ArrayType(IntegerType)) - assert(literalProto.getArray.getDataType.hasElementType) - } - - test("element type of array literal is set for a non-empty array with non-inferable type") { - val literalProto = toLiteralProto(Array[String]("1", "2", "3"), ArrayType(StringType)) - assert(literalProto.getArray.getDataType.hasElementType) - } - - test("element type of array literal is not set for a non-empty array with inferable type") { - val literalProto = - toLiteralProto(Array(1, 2, 3), ArrayType(IntegerType)) - assert(!literalProto.getArray.getDataType.hasElementType) - } - - test("key and value type of map literal are set for an empty map") { - val literalProto = toLiteralProto(Map[Int, Int](), MapType(IntegerType, IntegerType)) - assert(literalProto.getMap.getDataType.hasKeyType) - assert(literalProto.getMap.getDataType.hasValueType) - } - - test("key type of map literal is set for a non-empty map with non-inferable key type") { - val literalProto = toLiteralProto( - Map[String, Int]("1" -> 1, "2" -> 2, "3" -> 3), - MapType(StringType, IntegerType)) - assert(literalProto.getMap.getDataType.hasKeyType) - assert(!literalProto.getMap.getDataType.hasValueType) - } - - test("value type of map literal is set for a non-empty map with non-inferable value type") { - val literalProto = toLiteralProto( - Map[Int, String](1 -> "1", 2 -> "2", 3 -> "3"), - MapType(IntegerType, StringType)) - assert(!literalProto.getMap.getDataType.hasKeyType) - assert(literalProto.getMap.getDataType.hasValueType) - } - - test("key and value type of map literal are not set for a non-empty map with inferable types") { - val literalProto = - toLiteralProto(Map(1 -> 2, 3 -> 4, 5 -> 6), MapType(IntegerType, IntegerType)) - assert(!literalProto.getMap.getDataType.hasKeyType) - assert(!literalProto.getMap.getDataType.hasValueType) + assert(resultType.getKindCase == proto.DataType.KindCase.STRUCT) + val structType = resultType.getStruct + assert(structType.getFieldsCount == 2) + assert(structType.getFields(0).getName == "a") + assert(structType.getFields(0).getDataType.hasInteger) + assert(structType.getFields(0).getNullable) + assert(structType.getFields(1).getName == "b") + assert(structType.getFields(1).getDataType.hasString) + assert(!structType.getFields(1).getNullable) } test("an invalid array literal") {