diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.py b/python/pyspark/sql/connect/proto/expressions_pb2.py index fe83e78ce657..3ce27567f1e0 100644 --- a/python/pyspark/sql/connect/proto/expressions_pb2.py +++ b/python/pyspark/sql/connect/proto/expressions_pb2.py @@ -40,7 +40,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x1fspark/connect/expressions.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x19spark/connect/types.proto\x1a\x1aspark/connect/common.proto"\xcc\x36\n\nExpression\x12\x37\n\x06\x63ommon\x18\x12 \x01(\x0b\x32\x1f.spark.connect.ExpressionCommonR\x06\x63ommon\x12=\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x07literal\x12\x62\n\x14unresolved_attribute\x18\x02 \x01(\x0b\x32-.spark.connect.Expression.UnresolvedAttributeH\x00R\x13unresolvedAttribute\x12_\n\x13unresolved_function\x18\x03 \x01(\x0b\x32,.spark.connect.Expression.UnresolvedFunctionH\x00R\x12unresolvedFunction\x12Y\n\x11\x65xpression_string\x18\x04 \x01(\x0b\x32*.spark.connect.Expression.ExpressionStringH\x00R\x10\x65xpressionString\x12S\n\x0funresolved_star\x18\x05 \x01(\x0b\x32(.spark.connect.Expression.UnresolvedStarH\x00R\x0eunresolvedStar\x12\x37\n\x05\x61lias\x18\x06 \x01(\x0b\x32\x1f.spark.connect.Expression.AliasH\x00R\x05\x61lias\x12\x34\n\x04\x63\x61st\x18\x07 \x01(\x0b\x32\x1e.spark.connect.Expression.CastH\x00R\x04\x63\x61st\x12V\n\x10unresolved_regex\x18\x08 \x01(\x0b\x32).spark.connect.Expression.UnresolvedRegexH\x00R\x0funresolvedRegex\x12\x44\n\nsort_order\x18\t \x01(\x0b\x32#.spark.connect.Expression.SortOrderH\x00R\tsortOrder\x12S\n\x0flambda_function\x18\n \x01(\x0b\x32(.spark.connect.Expression.LambdaFunctionH\x00R\x0elambdaFunction\x12:\n\x06window\x18\x0b \x01(\x0b\x32 .spark.connect.Expression.WindowH\x00R\x06window\x12l\n\x18unresolved_extract_value\x18\x0c \x01(\x0b\x32\x30.spark.connect.Expression.UnresolvedExtractValueH\x00R\x16unresolvedExtractValue\x12M\n\rupdate_fields\x18\r \x01(\x0b\x32&.spark.connect.Expression.UpdateFieldsH\x00R\x0cupdateFields\x12\x82\x01\n unresolved_named_lambda_variable\x18\x0e \x01(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableH\x00R\x1dunresolvedNamedLambdaVariable\x12~\n#common_inline_user_defined_function\x18\x0f \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x1f\x63ommonInlineUserDefinedFunction\x12\x42\n\rcall_function\x18\x10 \x01(\x0b\x32\x1b.spark.connect.CallFunctionH\x00R\x0c\x63\x61llFunction\x12\x64\n\x19named_argument_expression\x18\x11 \x01(\x0b\x32&.spark.connect.NamedArgumentExpressionH\x00R\x17namedArgumentExpression\x12?\n\x0cmerge_action\x18\x13 \x01(\x0b\x32\x1a.spark.connect.MergeActionH\x00R\x0bmergeAction\x12g\n\x1atyped_aggregate_expression\x18\x14 \x01(\x0b\x32\'.spark.connect.TypedAggregateExpressionH\x00R\x18typedAggregateExpression\x12T\n\x13subquery_expression\x18\x15 \x01(\x0b\x32!.spark.connect.SubqueryExpressionH\x00R\x12subqueryExpression\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\x8f\x06\n\x06Window\x12\x42\n\x0fwindow_function\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0ewindowFunction\x12@\n\x0epartition_spec\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x03 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12K\n\nframe_spec\x18\x04 \x01(\x0b\x32,.spark.connect.Expression.Window.WindowFrameR\tframeSpec\x1a\xed\x03\n\x0bWindowFrame\x12U\n\nframe_type\x18\x01 \x01(\x0e\x32\x36.spark.connect.Expression.Window.WindowFrame.FrameTypeR\tframeType\x12P\n\x05lower\x18\x02 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05lower\x12P\n\x05upper\x18\x03 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05upper\x1a\x91\x01\n\rFrameBoundary\x12!\n\x0b\x63urrent_row\x18\x01 \x01(\x08H\x00R\ncurrentRow\x12\x1e\n\tunbounded\x18\x02 \x01(\x08H\x00R\tunbounded\x12\x31\n\x05value\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\x05valueB\n\n\x08\x62oundary"O\n\tFrameType\x12\x18\n\x14\x46RAME_TYPE_UNDEFINED\x10\x00\x12\x12\n\x0e\x46RAME_TYPE_ROW\x10\x01\x12\x14\n\x10\x46RAME_TYPE_RANGE\x10\x02\x1a\xa9\x03\n\tSortOrder\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12O\n\tdirection\x18\x02 \x01(\x0e\x32\x31.spark.connect.Expression.SortOrder.SortDirectionR\tdirection\x12U\n\rnull_ordering\x18\x03 \x01(\x0e\x32\x30.spark.connect.Expression.SortOrder.NullOrderingR\x0cnullOrdering"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"U\n\x0cNullOrdering\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02\x1a\xbb\x02\n\x04\x43\x61st\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12-\n\x04type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04type\x12\x1b\n\x08type_str\x18\x03 \x01(\tH\x00R\x07typeStr\x12\x44\n\teval_mode\x18\x04 \x01(\x0e\x32\'.spark.connect.Expression.Cast.EvalModeR\x08\x65valMode"b\n\x08\x45valMode\x12\x19\n\x15\x45VAL_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10\x45VAL_MODE_LEGACY\x10\x01\x12\x12\n\x0e\x45VAL_MODE_ANSI\x10\x02\x12\x11\n\rEVAL_MODE_TRY\x10\x03\x42\x0e\n\x0c\x63\x61st_to_type\x1a\x9a\x11\n\x07Literal\x12-\n\x04null\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04null\x12\x18\n\x06\x62inary\x18\x02 \x01(\x0cH\x00R\x06\x62inary\x12\x1a\n\x07\x62oolean\x18\x03 \x01(\x08H\x00R\x07\x62oolean\x12\x14\n\x04\x62yte\x18\x04 \x01(\x05H\x00R\x04\x62yte\x12\x16\n\x05short\x18\x05 \x01(\x05H\x00R\x05short\x12\x1a\n\x07integer\x18\x06 \x01(\x05H\x00R\x07integer\x12\x14\n\x04long\x18\x07 \x01(\x03H\x00R\x04long\x12\x16\n\x05\x66loat\x18\n \x01(\x02H\x00R\x05\x66loat\x12\x18\n\x06\x64ouble\x18\x0b \x01(\x01H\x00R\x06\x64ouble\x12\x45\n\x07\x64\x65\x63imal\x18\x0c \x01(\x0b\x32).spark.connect.Expression.Literal.DecimalH\x00R\x07\x64\x65\x63imal\x12\x18\n\x06string\x18\r \x01(\tH\x00R\x06string\x12\x14\n\x04\x64\x61te\x18\x10 \x01(\x05H\x00R\x04\x64\x61te\x12\x1e\n\ttimestamp\x18\x11 \x01(\x03H\x00R\ttimestamp\x12%\n\rtimestamp_ntz\x18\x12 \x01(\x03H\x00R\x0ctimestampNtz\x12\x61\n\x11\x63\x61lendar_interval\x18\x13 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12\x30\n\x13year_month_interval\x18\x14 \x01(\x05H\x00R\x11yearMonthInterval\x12,\n\x11\x64\x61y_time_interval\x18\x15 \x01(\x03H\x00R\x0f\x64\x61yTimeInterval\x12?\n\x05\x61rray\x18\x16 \x01(\x0b\x32\'.spark.connect.Expression.Literal.ArrayH\x00R\x05\x61rray\x12\x39\n\x03map\x18\x17 \x01(\x0b\x32%.spark.connect.Expression.Literal.MapH\x00R\x03map\x12\x42\n\x06struct\x18\x18 \x01(\x0b\x32(.spark.connect.Expression.Literal.StructH\x00R\x06struct\x12\x61\n\x11specialized_array\x18\x19 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.SpecializedArrayH\x00R\x10specializedArray\x12<\n\x04time\x18\x1a \x01(\x0b\x32&.spark.connect.Expression.Literal.TimeH\x00R\x04time\x1au\n\x07\x44\x65\x63imal\x12\x14\n\x05value\x18\x01 \x01(\tR\x05value\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x12\x19\n\x05scale\x18\x03 \x01(\x05H\x01R\x05scale\x88\x01\x01\x42\x0c\n\n_precisionB\x08\n\x06_scale\x1a\x62\n\x10\x43\x61lendarInterval\x12\x16\n\x06months\x18\x01 \x01(\x05R\x06months\x12\x12\n\x04\x64\x61ys\x18\x02 \x01(\x05R\x04\x64\x61ys\x12"\n\x0cmicroseconds\x18\x03 \x01(\x03R\x0cmicroseconds\x1a\x82\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x1a\xe3\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12\x35\n\x04keys\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x04keys\x12\x39\n\x06values\x18\x04 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1a\xcf\x01\n\x06Struct\x12<\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeB\x02\x18\x01R\nstructType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x12H\n\x10\x64\x61ta_type_struct\x18\x03 \x01(\x0b\x32\x1e.spark.connect.DataType.StructR\x0e\x64\x61taTypeStruct\x1a\xc0\x02\n\x10SpecializedArray\x12,\n\x05\x62ools\x18\x01 \x01(\x0b\x32\x14.spark.connect.BoolsH\x00R\x05\x62ools\x12)\n\x04ints\x18\x02 \x01(\x0b\x32\x13.spark.connect.IntsH\x00R\x04ints\x12,\n\x05longs\x18\x03 \x01(\x0b\x32\x14.spark.connect.LongsH\x00R\x05longs\x12/\n\x06\x66loats\x18\x04 \x01(\x0b\x32\x15.spark.connect.FloatsH\x00R\x06\x66loats\x12\x32\n\x07\x64oubles\x18\x05 \x01(\x0b\x32\x16.spark.connect.DoublesH\x00R\x07\x64oubles\x12\x32\n\x07strings\x18\x06 \x01(\x0b\x32\x16.spark.connect.StringsH\x00R\x07stringsB\x0c\n\nvalue_type\x1aK\n\x04Time\x12\x12\n\x04nano\x18\x01 \x01(\x03R\x04nano\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x42\x0c\n\n_precisionB\x0e\n\x0cliteral_type\x1a\xba\x01\n\x13UnresolvedAttribute\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12\x31\n\x12is_metadata_column\x18\x03 \x01(\x08H\x01R\x10isMetadataColumn\x88\x01\x01\x42\n\n\x08_plan_idB\x15\n\x13_is_metadata_column\x1a\x82\x02\n\x12UnresolvedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x1f\n\x0bis_distinct\x18\x03 \x01(\x08R\nisDistinct\x12\x37\n\x18is_user_defined_function\x18\x04 \x01(\x08R\x15isUserDefinedFunction\x12$\n\x0bis_internal\x18\x05 \x01(\x08H\x00R\nisInternal\x88\x01\x01\x42\x0e\n\x0c_is_internal\x1a\x32\n\x10\x45xpressionString\x12\x1e\n\nexpression\x18\x01 \x01(\tR\nexpression\x1a|\n\x0eUnresolvedStar\x12,\n\x0funparsed_target\x18\x01 \x01(\tH\x00R\x0eunparsedTarget\x88\x01\x01\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x01R\x06planId\x88\x01\x01\x42\x12\n\x10_unparsed_targetB\n\n\x08_plan_id\x1aV\n\x0fUnresolvedRegex\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\x84\x01\n\x16UnresolvedExtractValue\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12\x39\n\nextraction\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nextraction\x1a\xbb\x01\n\x0cUpdateFields\x12\x46\n\x11struct_expression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x10structExpression\x12\x1d\n\nfield_name\x18\x02 \x01(\tR\tfieldName\x12\x44\n\x10value_expression\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0fvalueExpression\x1ax\n\x05\x41lias\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12\x12\n\x04name\x18\x02 \x03(\tR\x04name\x12\x1f\n\x08metadata\x18\x03 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x9e\x01\n\x0eLambdaFunction\x12\x35\n\x08\x66unction\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08\x66unction\x12U\n\targuments\x18\x02 \x03(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableR\targuments\x1a>\n\x1dUnresolvedNamedLambdaVariable\x12\x1d\n\nname_parts\x18\x01 \x03(\tR\tnamePartsB\x0b\n\texpr_type"A\n\x10\x45xpressionCommon\x12-\n\x06origin\x18\x01 \x01(\x0b\x32\x15.spark.connect.OriginR\x06origin"\x8d\x03\n\x1f\x43ommonInlineUserDefinedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x39\n\npython_udf\x18\x04 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\tpythonUdf\x12I\n\x10scalar_scala_udf\x18\x05 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\x0escalarScalaUdf\x12\x33\n\x08java_udf\x18\x06 \x01(\x0b\x32\x16.spark.connect.JavaUDFH\x00R\x07javaUdf\x12\x1f\n\x0bis_distinct\x18\x07 \x01(\x08R\nisDistinctB\n\n\x08\x66unction"\xcc\x01\n\tPythonUDF\x12\x38\n\x0boutput_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVer\x12/\n\x13\x61\x64\x64itional_includes\x18\x05 \x03(\tR\x12\x61\x64\x64itionalIncludes"\xd6\x01\n\x0eScalarScalaUDF\x12\x18\n\x07payload\x18\x01 \x01(\x0cR\x07payload\x12\x37\n\ninputTypes\x18\x02 \x03(\x0b\x32\x17.spark.connect.DataTypeR\ninputTypes\x12\x37\n\noutputType\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1a\n\x08nullable\x18\x04 \x01(\x08R\x08nullable\x12\x1c\n\taggregate\x18\x05 \x01(\x08R\taggregate"\x95\x01\n\x07JavaUDF\x12\x1d\n\nclass_name\x18\x01 \x01(\tR\tclassName\x12=\n\x0boutput_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\noutputType\x88\x01\x01\x12\x1c\n\taggregate\x18\x03 \x01(\x08R\taggregateB\x0e\n\x0c_output_type"c\n\x18TypedAggregateExpression\x12G\n\x10scalar_scala_udf\x18\x01 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFR\x0escalarScalaUdf"l\n\x0c\x43\x61llFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"\\\n\x17NamedArgumentExpression\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\x80\x04\n\x0bMergeAction\x12\x46\n\x0b\x61\x63tion_type\x18\x01 \x01(\x0e\x32%.spark.connect.MergeAction.ActionTypeR\nactionType\x12<\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\tcondition\x88\x01\x01\x12G\n\x0b\x61ssignments\x18\x03 \x03(\x0b\x32%.spark.connect.MergeAction.AssignmentR\x0b\x61ssignments\x1aj\n\nAssignment\x12+\n\x03key\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\xa7\x01\n\nActionType\x12\x17\n\x13\x41\x43TION_TYPE_INVALID\x10\x00\x12\x16\n\x12\x41\x43TION_TYPE_DELETE\x10\x01\x12\x16\n\x12\x41\x43TION_TYPE_INSERT\x10\x02\x12\x1b\n\x17\x41\x43TION_TYPE_INSERT_STAR\x10\x03\x12\x16\n\x12\x41\x43TION_TYPE_UPDATE\x10\x04\x12\x1b\n\x17\x41\x43TION_TYPE_UPDATE_STAR\x10\x05\x42\x0c\n\n_condition"\xc5\x05\n\x12SubqueryExpression\x12\x17\n\x07plan_id\x18\x01 \x01(\x03R\x06planId\x12S\n\rsubquery_type\x18\x02 \x01(\x0e\x32..spark.connect.SubqueryExpression.SubqueryTypeR\x0csubqueryType\x12\x62\n\x11table_arg_options\x18\x03 \x01(\x0b\x32\x31.spark.connect.SubqueryExpression.TableArgOptionsH\x00R\x0ftableArgOptions\x88\x01\x01\x12G\n\x12in_subquery_values\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x10inSubqueryValues\x1a\xea\x01\n\x0fTableArgOptions\x12@\n\x0epartition_spec\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12\x37\n\x15with_single_partition\x18\x03 \x01(\x08H\x00R\x13withSinglePartition\x88\x01\x01\x42\x18\n\x16_with_single_partition"\x90\x01\n\x0cSubqueryType\x12\x19\n\x15SUBQUERY_TYPE_UNKNOWN\x10\x00\x12\x18\n\x14SUBQUERY_TYPE_SCALAR\x10\x01\x12\x18\n\x14SUBQUERY_TYPE_EXISTS\x10\x02\x12\x1b\n\x17SUBQUERY_TYPE_TABLE_ARG\x10\x03\x12\x14\n\x10SUBQUERY_TYPE_IN\x10\x04\x42\x14\n\x12_table_arg_optionsB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' + b'\n\x1fspark/connect/expressions.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x19spark/connect/types.proto\x1a\x1aspark/connect/common.proto"\xce\x37\n\nExpression\x12\x37\n\x06\x63ommon\x18\x12 \x01(\x0b\x32\x1f.spark.connect.ExpressionCommonR\x06\x63ommon\x12=\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x07literal\x12\x62\n\x14unresolved_attribute\x18\x02 \x01(\x0b\x32-.spark.connect.Expression.UnresolvedAttributeH\x00R\x13unresolvedAttribute\x12_\n\x13unresolved_function\x18\x03 \x01(\x0b\x32,.spark.connect.Expression.UnresolvedFunctionH\x00R\x12unresolvedFunction\x12Y\n\x11\x65xpression_string\x18\x04 \x01(\x0b\x32*.spark.connect.Expression.ExpressionStringH\x00R\x10\x65xpressionString\x12S\n\x0funresolved_star\x18\x05 \x01(\x0b\x32(.spark.connect.Expression.UnresolvedStarH\x00R\x0eunresolvedStar\x12\x37\n\x05\x61lias\x18\x06 \x01(\x0b\x32\x1f.spark.connect.Expression.AliasH\x00R\x05\x61lias\x12\x34\n\x04\x63\x61st\x18\x07 \x01(\x0b\x32\x1e.spark.connect.Expression.CastH\x00R\x04\x63\x61st\x12V\n\x10unresolved_regex\x18\x08 \x01(\x0b\x32).spark.connect.Expression.UnresolvedRegexH\x00R\x0funresolvedRegex\x12\x44\n\nsort_order\x18\t \x01(\x0b\x32#.spark.connect.Expression.SortOrderH\x00R\tsortOrder\x12S\n\x0flambda_function\x18\n \x01(\x0b\x32(.spark.connect.Expression.LambdaFunctionH\x00R\x0elambdaFunction\x12:\n\x06window\x18\x0b \x01(\x0b\x32 .spark.connect.Expression.WindowH\x00R\x06window\x12l\n\x18unresolved_extract_value\x18\x0c \x01(\x0b\x32\x30.spark.connect.Expression.UnresolvedExtractValueH\x00R\x16unresolvedExtractValue\x12M\n\rupdate_fields\x18\r \x01(\x0b\x32&.spark.connect.Expression.UpdateFieldsH\x00R\x0cupdateFields\x12\x82\x01\n unresolved_named_lambda_variable\x18\x0e \x01(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableH\x00R\x1dunresolvedNamedLambdaVariable\x12~\n#common_inline_user_defined_function\x18\x0f \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x1f\x63ommonInlineUserDefinedFunction\x12\x42\n\rcall_function\x18\x10 \x01(\x0b\x32\x1b.spark.connect.CallFunctionH\x00R\x0c\x63\x61llFunction\x12\x64\n\x19named_argument_expression\x18\x11 \x01(\x0b\x32&.spark.connect.NamedArgumentExpressionH\x00R\x17namedArgumentExpression\x12?\n\x0cmerge_action\x18\x13 \x01(\x0b\x32\x1a.spark.connect.MergeActionH\x00R\x0bmergeAction\x12g\n\x1atyped_aggregate_expression\x18\x14 \x01(\x0b\x32\'.spark.connect.TypedAggregateExpressionH\x00R\x18typedAggregateExpression\x12T\n\x13subquery_expression\x18\x15 \x01(\x0b\x32!.spark.connect.SubqueryExpressionH\x00R\x12subqueryExpression\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\x8f\x06\n\x06Window\x12\x42\n\x0fwindow_function\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0ewindowFunction\x12@\n\x0epartition_spec\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x03 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12K\n\nframe_spec\x18\x04 \x01(\x0b\x32,.spark.connect.Expression.Window.WindowFrameR\tframeSpec\x1a\xed\x03\n\x0bWindowFrame\x12U\n\nframe_type\x18\x01 \x01(\x0e\x32\x36.spark.connect.Expression.Window.WindowFrame.FrameTypeR\tframeType\x12P\n\x05lower\x18\x02 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05lower\x12P\n\x05upper\x18\x03 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05upper\x1a\x91\x01\n\rFrameBoundary\x12!\n\x0b\x63urrent_row\x18\x01 \x01(\x08H\x00R\ncurrentRow\x12\x1e\n\tunbounded\x18\x02 \x01(\x08H\x00R\tunbounded\x12\x31\n\x05value\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\x05valueB\n\n\x08\x62oundary"O\n\tFrameType\x12\x18\n\x14\x46RAME_TYPE_UNDEFINED\x10\x00\x12\x12\n\x0e\x46RAME_TYPE_ROW\x10\x01\x12\x14\n\x10\x46RAME_TYPE_RANGE\x10\x02\x1a\xa9\x03\n\tSortOrder\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12O\n\tdirection\x18\x02 \x01(\x0e\x32\x31.spark.connect.Expression.SortOrder.SortDirectionR\tdirection\x12U\n\rnull_ordering\x18\x03 \x01(\x0e\x32\x30.spark.connect.Expression.SortOrder.NullOrderingR\x0cnullOrdering"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"U\n\x0cNullOrdering\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02\x1a\xbb\x02\n\x04\x43\x61st\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12-\n\x04type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04type\x12\x1b\n\x08type_str\x18\x03 \x01(\tH\x00R\x07typeStr\x12\x44\n\teval_mode\x18\x04 \x01(\x0e\x32\'.spark.connect.Expression.Cast.EvalModeR\x08\x65valMode"b\n\x08\x45valMode\x12\x19\n\x15\x45VAL_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10\x45VAL_MODE_LEGACY\x10\x01\x12\x12\n\x0e\x45VAL_MODE_ANSI\x10\x02\x12\x11\n\rEVAL_MODE_TRY\x10\x03\x42\x0e\n\x0c\x63\x61st_to_type\x1a\x9c\x12\n\x07Literal\x12-\n\x04null\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04null\x12\x18\n\x06\x62inary\x18\x02 \x01(\x0cH\x00R\x06\x62inary\x12\x1a\n\x07\x62oolean\x18\x03 \x01(\x08H\x00R\x07\x62oolean\x12\x14\n\x04\x62yte\x18\x04 \x01(\x05H\x00R\x04\x62yte\x12\x16\n\x05short\x18\x05 \x01(\x05H\x00R\x05short\x12\x1a\n\x07integer\x18\x06 \x01(\x05H\x00R\x07integer\x12\x14\n\x04long\x18\x07 \x01(\x03H\x00R\x04long\x12\x16\n\x05\x66loat\x18\n \x01(\x02H\x00R\x05\x66loat\x12\x18\n\x06\x64ouble\x18\x0b \x01(\x01H\x00R\x06\x64ouble\x12\x45\n\x07\x64\x65\x63imal\x18\x0c \x01(\x0b\x32).spark.connect.Expression.Literal.DecimalH\x00R\x07\x64\x65\x63imal\x12\x18\n\x06string\x18\r \x01(\tH\x00R\x06string\x12\x14\n\x04\x64\x61te\x18\x10 \x01(\x05H\x00R\x04\x64\x61te\x12\x1e\n\ttimestamp\x18\x11 \x01(\x03H\x00R\ttimestamp\x12%\n\rtimestamp_ntz\x18\x12 \x01(\x03H\x00R\x0ctimestampNtz\x12\x61\n\x11\x63\x61lendar_interval\x18\x13 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12\x30\n\x13year_month_interval\x18\x14 \x01(\x05H\x00R\x11yearMonthInterval\x12,\n\x11\x64\x61y_time_interval\x18\x15 \x01(\x03H\x00R\x0f\x64\x61yTimeInterval\x12?\n\x05\x61rray\x18\x16 \x01(\x0b\x32\'.spark.connect.Expression.Literal.ArrayH\x00R\x05\x61rray\x12\x39\n\x03map\x18\x17 \x01(\x0b\x32%.spark.connect.Expression.Literal.MapH\x00R\x03map\x12\x42\n\x06struct\x18\x18 \x01(\x0b\x32(.spark.connect.Expression.Literal.StructH\x00R\x06struct\x12\x61\n\x11specialized_array\x18\x19 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.SpecializedArrayH\x00R\x10specializedArray\x12<\n\x04time\x18\x1a \x01(\x0b\x32&.spark.connect.Expression.Literal.TimeH\x00R\x04time\x1au\n\x07\x44\x65\x63imal\x12\x14\n\x05value\x18\x01 \x01(\tR\x05value\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x12\x19\n\x05scale\x18\x03 \x01(\x05H\x01R\x05scale\x88\x01\x01\x42\x0c\n\n_precisionB\x08\n\x06_scale\x1a\x62\n\x10\x43\x61lendarInterval\x12\x16\n\x06months\x18\x01 \x01(\x05R\x06months\x12\x12\n\x04\x64\x61ys\x18\x02 \x01(\x05R\x04\x64\x61ys\x12"\n\x0cmicroseconds\x18\x03 \x01(\x03R\x0cmicroseconds\x1a\xc2\x01\n\x05\x41rray\x12>\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeB\x02\x18\x01R\x0b\x65lementType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x12:\n\tdata_type\x18\x03 \x01(\x0b\x32\x1d.spark.connect.DataType.ArrayR\x08\x64\x61taType\x1a\xa5\x02\n\x03Map\x12\x36\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeB\x02\x18\x01R\x07keyType\x12:\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeB\x02\x18\x01R\tvalueType\x12\x35\n\x04keys\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x04keys\x12\x39\n\x06values\x18\x04 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x12\x38\n\tdata_type\x18\x05 \x01(\x0b\x32\x1b.spark.connect.DataType.MapR\x08\x64\x61taType\x1a\xcf\x01\n\x06Struct\x12<\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeB\x02\x18\x01R\nstructType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x12H\n\x10\x64\x61ta_type_struct\x18\x03 \x01(\x0b\x32\x1e.spark.connect.DataType.StructR\x0e\x64\x61taTypeStruct\x1a\xc0\x02\n\x10SpecializedArray\x12,\n\x05\x62ools\x18\x01 \x01(\x0b\x32\x14.spark.connect.BoolsH\x00R\x05\x62ools\x12)\n\x04ints\x18\x02 \x01(\x0b\x32\x13.spark.connect.IntsH\x00R\x04ints\x12,\n\x05longs\x18\x03 \x01(\x0b\x32\x14.spark.connect.LongsH\x00R\x05longs\x12/\n\x06\x66loats\x18\x04 \x01(\x0b\x32\x15.spark.connect.FloatsH\x00R\x06\x66loats\x12\x32\n\x07\x64oubles\x18\x05 \x01(\x0b\x32\x16.spark.connect.DoublesH\x00R\x07\x64oubles\x12\x32\n\x07strings\x18\x06 \x01(\x0b\x32\x16.spark.connect.StringsH\x00R\x07stringsB\x0c\n\nvalue_type\x1aK\n\x04Time\x12\x12\n\x04nano\x18\x01 \x01(\x03R\x04nano\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x42\x0c\n\n_precisionB\x0e\n\x0cliteral_type\x1a\xba\x01\n\x13UnresolvedAttribute\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12\x31\n\x12is_metadata_column\x18\x03 \x01(\x08H\x01R\x10isMetadataColumn\x88\x01\x01\x42\n\n\x08_plan_idB\x15\n\x13_is_metadata_column\x1a\x82\x02\n\x12UnresolvedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x1f\n\x0bis_distinct\x18\x03 \x01(\x08R\nisDistinct\x12\x37\n\x18is_user_defined_function\x18\x04 \x01(\x08R\x15isUserDefinedFunction\x12$\n\x0bis_internal\x18\x05 \x01(\x08H\x00R\nisInternal\x88\x01\x01\x42\x0e\n\x0c_is_internal\x1a\x32\n\x10\x45xpressionString\x12\x1e\n\nexpression\x18\x01 \x01(\tR\nexpression\x1a|\n\x0eUnresolvedStar\x12,\n\x0funparsed_target\x18\x01 \x01(\tH\x00R\x0eunparsedTarget\x88\x01\x01\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x01R\x06planId\x88\x01\x01\x42\x12\n\x10_unparsed_targetB\n\n\x08_plan_id\x1aV\n\x0fUnresolvedRegex\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\x84\x01\n\x16UnresolvedExtractValue\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12\x39\n\nextraction\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nextraction\x1a\xbb\x01\n\x0cUpdateFields\x12\x46\n\x11struct_expression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x10structExpression\x12\x1d\n\nfield_name\x18\x02 \x01(\tR\tfieldName\x12\x44\n\x10value_expression\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0fvalueExpression\x1ax\n\x05\x41lias\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12\x12\n\x04name\x18\x02 \x03(\tR\x04name\x12\x1f\n\x08metadata\x18\x03 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x9e\x01\n\x0eLambdaFunction\x12\x35\n\x08\x66unction\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08\x66unction\x12U\n\targuments\x18\x02 \x03(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableR\targuments\x1a>\n\x1dUnresolvedNamedLambdaVariable\x12\x1d\n\nname_parts\x18\x01 \x03(\tR\tnamePartsB\x0b\n\texpr_type"A\n\x10\x45xpressionCommon\x12-\n\x06origin\x18\x01 \x01(\x0b\x32\x15.spark.connect.OriginR\x06origin"\x8d\x03\n\x1f\x43ommonInlineUserDefinedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x39\n\npython_udf\x18\x04 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\tpythonUdf\x12I\n\x10scalar_scala_udf\x18\x05 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\x0escalarScalaUdf\x12\x33\n\x08java_udf\x18\x06 \x01(\x0b\x32\x16.spark.connect.JavaUDFH\x00R\x07javaUdf\x12\x1f\n\x0bis_distinct\x18\x07 \x01(\x08R\nisDistinctB\n\n\x08\x66unction"\xcc\x01\n\tPythonUDF\x12\x38\n\x0boutput_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVer\x12/\n\x13\x61\x64\x64itional_includes\x18\x05 \x03(\tR\x12\x61\x64\x64itionalIncludes"\xd6\x01\n\x0eScalarScalaUDF\x12\x18\n\x07payload\x18\x01 \x01(\x0cR\x07payload\x12\x37\n\ninputTypes\x18\x02 \x03(\x0b\x32\x17.spark.connect.DataTypeR\ninputTypes\x12\x37\n\noutputType\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1a\n\x08nullable\x18\x04 \x01(\x08R\x08nullable\x12\x1c\n\taggregate\x18\x05 \x01(\x08R\taggregate"\x95\x01\n\x07JavaUDF\x12\x1d\n\nclass_name\x18\x01 \x01(\tR\tclassName\x12=\n\x0boutput_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\noutputType\x88\x01\x01\x12\x1c\n\taggregate\x18\x03 \x01(\x08R\taggregateB\x0e\n\x0c_output_type"c\n\x18TypedAggregateExpression\x12G\n\x10scalar_scala_udf\x18\x01 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFR\x0escalarScalaUdf"l\n\x0c\x43\x61llFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"\\\n\x17NamedArgumentExpression\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\x80\x04\n\x0bMergeAction\x12\x46\n\x0b\x61\x63tion_type\x18\x01 \x01(\x0e\x32%.spark.connect.MergeAction.ActionTypeR\nactionType\x12<\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\tcondition\x88\x01\x01\x12G\n\x0b\x61ssignments\x18\x03 \x03(\x0b\x32%.spark.connect.MergeAction.AssignmentR\x0b\x61ssignments\x1aj\n\nAssignment\x12+\n\x03key\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\xa7\x01\n\nActionType\x12\x17\n\x13\x41\x43TION_TYPE_INVALID\x10\x00\x12\x16\n\x12\x41\x43TION_TYPE_DELETE\x10\x01\x12\x16\n\x12\x41\x43TION_TYPE_INSERT\x10\x02\x12\x1b\n\x17\x41\x43TION_TYPE_INSERT_STAR\x10\x03\x12\x16\n\x12\x41\x43TION_TYPE_UPDATE\x10\x04\x12\x1b\n\x17\x41\x43TION_TYPE_UPDATE_STAR\x10\x05\x42\x0c\n\n_condition"\xc5\x05\n\x12SubqueryExpression\x12\x17\n\x07plan_id\x18\x01 \x01(\x03R\x06planId\x12S\n\rsubquery_type\x18\x02 \x01(\x0e\x32..spark.connect.SubqueryExpression.SubqueryTypeR\x0csubqueryType\x12\x62\n\x11table_arg_options\x18\x03 \x01(\x0b\x32\x31.spark.connect.SubqueryExpression.TableArgOptionsH\x00R\x0ftableArgOptions\x88\x01\x01\x12G\n\x12in_subquery_values\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x10inSubqueryValues\x1a\xea\x01\n\x0fTableArgOptions\x12@\n\x0epartition_spec\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12\x37\n\x15with_single_partition\x18\x03 \x01(\x08H\x00R\x13withSinglePartition\x88\x01\x01\x42\x18\n\x16_with_single_partition"\x90\x01\n\x0cSubqueryType\x12\x19\n\x15SUBQUERY_TYPE_UNKNOWN\x10\x00\x12\x18\n\x14SUBQUERY_TYPE_SCALAR\x10\x01\x12\x18\n\x14SUBQUERY_TYPE_EXISTS\x10\x02\x12\x1b\n\x17SUBQUERY_TYPE_TABLE_ARG\x10\x03\x12\x14\n\x10SUBQUERY_TYPE_IN\x10\x04\x42\x14\n\x12_table_arg_optionsB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' ) _globals = globals() @@ -53,12 +53,22 @@ _globals[ "DESCRIPTOR" ]._serialized_options = b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated" + _globals["_EXPRESSION_LITERAL_ARRAY"].fields_by_name["element_type"]._loaded_options = None + _globals["_EXPRESSION_LITERAL_ARRAY"].fields_by_name[ + "element_type" + ]._serialized_options = b"\030\001" + _globals["_EXPRESSION_LITERAL_MAP"].fields_by_name["key_type"]._loaded_options = None + _globals["_EXPRESSION_LITERAL_MAP"].fields_by_name["key_type"]._serialized_options = b"\030\001" + _globals["_EXPRESSION_LITERAL_MAP"].fields_by_name["value_type"]._loaded_options = None + _globals["_EXPRESSION_LITERAL_MAP"].fields_by_name[ + "value_type" + ]._serialized_options = b"\030\001" _globals["_EXPRESSION_LITERAL_STRUCT"].fields_by_name["struct_type"]._loaded_options = None _globals["_EXPRESSION_LITERAL_STRUCT"].fields_by_name[ "struct_type" ]._serialized_options = b"\030\001" _globals["_EXPRESSION"]._serialized_start = 133 - _globals["_EXPRESSION"]._serialized_end = 7121 + _globals["_EXPRESSION"]._serialized_end = 7251 _globals["_EXPRESSION_WINDOW"]._serialized_start = 1986 _globals["_EXPRESSION_WINDOW"]._serialized_end = 2769 _globals["_EXPRESSION_WINDOW_WINDOWFRAME"]._serialized_start = 2276 @@ -78,67 +88,67 @@ _globals["_EXPRESSION_CAST_EVALMODE"]._serialized_start = 3401 _globals["_EXPRESSION_CAST_EVALMODE"]._serialized_end = 3499 _globals["_EXPRESSION_LITERAL"]._serialized_start = 3518 - _globals["_EXPRESSION_LITERAL"]._serialized_end = 5720 + _globals["_EXPRESSION_LITERAL"]._serialized_end = 5850 _globals["_EXPRESSION_LITERAL_DECIMAL"]._serialized_start = 4514 _globals["_EXPRESSION_LITERAL_DECIMAL"]._serialized_end = 4631 _globals["_EXPRESSION_LITERAL_CALENDARINTERVAL"]._serialized_start = 4633 _globals["_EXPRESSION_LITERAL_CALENDARINTERVAL"]._serialized_end = 4731 _globals["_EXPRESSION_LITERAL_ARRAY"]._serialized_start = 4734 - _globals["_EXPRESSION_LITERAL_ARRAY"]._serialized_end = 4864 - _globals["_EXPRESSION_LITERAL_MAP"]._serialized_start = 4867 - _globals["_EXPRESSION_LITERAL_MAP"]._serialized_end = 5094 - _globals["_EXPRESSION_LITERAL_STRUCT"]._serialized_start = 5097 - _globals["_EXPRESSION_LITERAL_STRUCT"]._serialized_end = 5304 - _globals["_EXPRESSION_LITERAL_SPECIALIZEDARRAY"]._serialized_start = 5307 - _globals["_EXPRESSION_LITERAL_SPECIALIZEDARRAY"]._serialized_end = 5627 - _globals["_EXPRESSION_LITERAL_TIME"]._serialized_start = 5629 - _globals["_EXPRESSION_LITERAL_TIME"]._serialized_end = 5704 - _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_start = 5723 - _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_end = 5909 - _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_start = 5912 - _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_end = 6170 - _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_start = 6172 - _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_end = 6222 - _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_start = 6224 - _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_end = 6348 - _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_start = 6350 - _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_end = 6436 - _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_start = 6439 - _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_end = 6571 - _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_start = 6574 - _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_end = 6761 - _globals["_EXPRESSION_ALIAS"]._serialized_start = 6763 - _globals["_EXPRESSION_ALIAS"]._serialized_end = 6883 - _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_start = 6886 - _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_end = 7044 - _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_start = 7046 - _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_end = 7108 - _globals["_EXPRESSIONCOMMON"]._serialized_start = 7123 - _globals["_EXPRESSIONCOMMON"]._serialized_end = 7188 - _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_start = 7191 - _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_end = 7588 - _globals["_PYTHONUDF"]._serialized_start = 7591 - _globals["_PYTHONUDF"]._serialized_end = 7795 - _globals["_SCALARSCALAUDF"]._serialized_start = 7798 - _globals["_SCALARSCALAUDF"]._serialized_end = 8012 - _globals["_JAVAUDF"]._serialized_start = 8015 - _globals["_JAVAUDF"]._serialized_end = 8164 - _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_start = 8166 - _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_end = 8265 - _globals["_CALLFUNCTION"]._serialized_start = 8267 - _globals["_CALLFUNCTION"]._serialized_end = 8375 - _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_start = 8377 - _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_end = 8469 - _globals["_MERGEACTION"]._serialized_start = 8472 - _globals["_MERGEACTION"]._serialized_end = 8984 - _globals["_MERGEACTION_ASSIGNMENT"]._serialized_start = 8694 - _globals["_MERGEACTION_ASSIGNMENT"]._serialized_end = 8800 - _globals["_MERGEACTION_ACTIONTYPE"]._serialized_start = 8803 - _globals["_MERGEACTION_ACTIONTYPE"]._serialized_end = 8970 - _globals["_SUBQUERYEXPRESSION"]._serialized_start = 8987 - _globals["_SUBQUERYEXPRESSION"]._serialized_end = 9696 - _globals["_SUBQUERYEXPRESSION_TABLEARGOPTIONS"]._serialized_start = 9293 - _globals["_SUBQUERYEXPRESSION_TABLEARGOPTIONS"]._serialized_end = 9527 - _globals["_SUBQUERYEXPRESSION_SUBQUERYTYPE"]._serialized_start = 9530 - _globals["_SUBQUERYEXPRESSION_SUBQUERYTYPE"]._serialized_end = 9674 + _globals["_EXPRESSION_LITERAL_ARRAY"]._serialized_end = 4928 + _globals["_EXPRESSION_LITERAL_MAP"]._serialized_start = 4931 + _globals["_EXPRESSION_LITERAL_MAP"]._serialized_end = 5224 + _globals["_EXPRESSION_LITERAL_STRUCT"]._serialized_start = 5227 + _globals["_EXPRESSION_LITERAL_STRUCT"]._serialized_end = 5434 + _globals["_EXPRESSION_LITERAL_SPECIALIZEDARRAY"]._serialized_start = 5437 + _globals["_EXPRESSION_LITERAL_SPECIALIZEDARRAY"]._serialized_end = 5757 + _globals["_EXPRESSION_LITERAL_TIME"]._serialized_start = 5759 + _globals["_EXPRESSION_LITERAL_TIME"]._serialized_end = 5834 + _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_start = 5853 + _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_end = 6039 + _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_start = 6042 + _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_end = 6300 + _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_start = 6302 + _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_end = 6352 + _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_start = 6354 + _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_end = 6478 + _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_start = 6480 + _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_end = 6566 + _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_start = 6569 + _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_end = 6701 + _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_start = 6704 + _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_end = 6891 + _globals["_EXPRESSION_ALIAS"]._serialized_start = 6893 + _globals["_EXPRESSION_ALIAS"]._serialized_end = 7013 + _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_start = 7016 + _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_end = 7174 + _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_start = 7176 + _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_end = 7238 + _globals["_EXPRESSIONCOMMON"]._serialized_start = 7253 + _globals["_EXPRESSIONCOMMON"]._serialized_end = 7318 + _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_start = 7321 + _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_end = 7718 + _globals["_PYTHONUDF"]._serialized_start = 7721 + _globals["_PYTHONUDF"]._serialized_end = 7925 + _globals["_SCALARSCALAUDF"]._serialized_start = 7928 + _globals["_SCALARSCALAUDF"]._serialized_end = 8142 + _globals["_JAVAUDF"]._serialized_start = 8145 + _globals["_JAVAUDF"]._serialized_end = 8294 + _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_start = 8296 + _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_end = 8395 + _globals["_CALLFUNCTION"]._serialized_start = 8397 + _globals["_CALLFUNCTION"]._serialized_end = 8505 + _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_start = 8507 + _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_end = 8599 + _globals["_MERGEACTION"]._serialized_start = 8602 + _globals["_MERGEACTION"]._serialized_end = 9114 + _globals["_MERGEACTION_ASSIGNMENT"]._serialized_start = 8824 + _globals["_MERGEACTION_ASSIGNMENT"]._serialized_end = 8930 + _globals["_MERGEACTION_ACTIONTYPE"]._serialized_start = 8933 + _globals["_MERGEACTION_ACTIONTYPE"]._serialized_end = 9100 + _globals["_SUBQUERYEXPRESSION"]._serialized_start = 9117 + _globals["_SUBQUERYEXPRESSION"]._serialized_end = 9826 + _globals["_SUBQUERYEXPRESSION_TABLEARGOPTIONS"]._serialized_start = 9423 + _globals["_SUBQUERYEXPRESSION_TABLEARGOPTIONS"]._serialized_end = 9657 + _globals["_SUBQUERYEXPRESSION_SUBQUERYTYPE"]._serialized_start = 9660 + _globals["_SUBQUERYEXPRESSION_SUBQUERYTYPE"]._serialized_end = 9804 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.pyi b/python/pyspark/sql/connect/proto/expressions_pb2.pyi index ad347fd4bd15..508a11a01c85 100644 --- a/python/pyspark/sql/connect/proto/expressions_pb2.pyi +++ b/python/pyspark/sql/connect/proto/expressions_pb2.pyi @@ -474,27 +474,51 @@ class Expression(google.protobuf.message.Message): ELEMENT_TYPE_FIELD_NUMBER: builtins.int ELEMENTS_FIELD_NUMBER: builtins.int + DATA_TYPE_FIELD_NUMBER: builtins.int @property - def element_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType: ... + def element_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType: + """(Deprecated) The element type of the array. + + This field is deprecated since Spark 4.1+ and should only be set + if the data_type field is not set. Use data_type field instead. + """ @property def elements( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ global___Expression.Literal - ]: ... + ]: + """The literal values that make up the array elements.""" + @property + def data_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType.Array: + """The type of the array. + + If the element type can be inferred from the first element of the elements field, + then you don't need to set data_type.element_type to save space. On the other hand, + redundant type information is also acceptable. + """ def __init__( self, *, element_type: pyspark.sql.connect.proto.types_pb2.DataType | None = ..., elements: collections.abc.Iterable[global___Expression.Literal] | None = ..., + data_type: pyspark.sql.connect.proto.types_pb2.DataType.Array | None = ..., ) -> None: ... def HasField( - self, field_name: typing_extensions.Literal["element_type", b"element_type"] + self, + field_name: typing_extensions.Literal[ + "data_type", b"data_type", "element_type", b"element_type" + ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ - "element_type", b"element_type", "elements", b"elements" + "data_type", + b"data_type", + "element_type", + b"element_type", + "elements", + b"elements", ], ) -> None: ... @@ -505,22 +529,43 @@ class Expression(google.protobuf.message.Message): VALUE_TYPE_FIELD_NUMBER: builtins.int KEYS_FIELD_NUMBER: builtins.int VALUES_FIELD_NUMBER: builtins.int + DATA_TYPE_FIELD_NUMBER: builtins.int @property - def key_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType: ... + def key_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType: + """(Deprecated) The key type of the map. + + This field is deprecated since Spark 4.1+ and should only be set + if the data_type field is not set. Use data_type field instead. + """ @property - def value_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType: ... + def value_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType: + """(Deprecated) The value type of the map. + + This field is deprecated since Spark 4.1+ and should only be set + if the data_type field is not set. Use data_type field instead. + """ @property def keys( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ global___Expression.Literal - ]: ... + ]: + """The literal keys that make up the map.""" @property def values( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ global___Expression.Literal - ]: ... + ]: + """The literal values that make up the map.""" + @property + def data_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType.Map: + """The type of the map. + + If the key/value types can be inferred from the first element of the keys/values fields, + then you don't need to set data_type.key_type/data_type.value_type to save space. + On the other hand, redundant type information is also acceptable. + """ def __init__( self, *, @@ -528,16 +573,19 @@ class Expression(google.protobuf.message.Message): value_type: pyspark.sql.connect.proto.types_pb2.DataType | None = ..., keys: collections.abc.Iterable[global___Expression.Literal] | None = ..., values: collections.abc.Iterable[global___Expression.Literal] | None = ..., + data_type: pyspark.sql.connect.proto.types_pb2.DataType.Map | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ - "key_type", b"key_type", "value_type", b"value_type" + "data_type", b"data_type", "key_type", b"key_type", "value_type", b"value_type" ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ + "data_type", + b"data_type", "key_type", b"key_type", "keys", diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala index 24a1d091af2f..29dd7d9742f4 100644 --- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala +++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala @@ -1687,6 +1687,35 @@ class ClientE2ETestSuite assert(df.count() == 100) } } + + test("SPARK-52930: the nullability of arrays should be preserved using typedlit") { + val arrays = Seq( + (typedlit(Array[Int]()), false), + (typedlit(Array[Int](1)), false), + (typedlit(Array[Integer]()), true), + (typedlit(Array[Integer](1)), true)) + for ((array, containsNull) <- arrays) { + val df = spark.sql("select 1").select(array) + df.createOrReplaceTempView("test_array_nullability") + val schema = spark.sql("select * from test_array_nullability").schema + assert(schema.fields.head.dataType.asInstanceOf[ArrayType].containsNull === containsNull) + } + } + + test("SPARK-52930: the nullability of map values should be preserved using typedlit") { + val maps = Seq( + (typedlit(Map[String, Int]()), false), + (typedlit(Map[String, Int]("a" -> 1)), false), + (typedlit(Map[String, Integer]()), true), + (typedlit(Map[String, Integer]("a" -> 1)), true)) + for ((map, valueContainsNull) <- maps) { + val df = spark.sql("select 1").select(map) + df.createOrReplaceTempView("test_map_nullability") + val schema = spark.sql("select * from test_map_nullability").schema + assert( + schema.fields.head.dataType.asInstanceOf[MapType].valueContainsNull === valueContainsNull) + } + } } private[sql] case class ClassData(a: String, b: Int) diff --git a/sql/connect/common/src/main/protobuf/spark/connect/expressions.proto b/sql/connect/common/src/main/protobuf/spark/connect/expressions.proto index 3ae6cb8dba9b..913622b91a28 100644 --- a/sql/connect/common/src/main/protobuf/spark/connect/expressions.proto +++ b/sql/connect/common/src/main/protobuf/spark/connect/expressions.proto @@ -215,15 +215,48 @@ message Expression { } message Array { - DataType element_type = 1; + // (Deprecated) The element type of the array. + // + // This field is deprecated since Spark 4.1+ and should only be set + // if the data_type field is not set. Use data_type field instead. + DataType element_type = 1 [deprecated = true]; + + // The literal values that make up the array elements. repeated Literal elements = 2; + + // The type of the array. + // + // If the element type can be inferred from the first element of the elements field, + // then you don't need to set data_type.element_type to save space. On the other hand, + // redundant type information is also acceptable. + DataType.Array data_type = 3; } message Map { - DataType key_type = 1; - DataType value_type = 2; + // (Deprecated) The key type of the map. + // + // This field is deprecated since Spark 4.1+ and should only be set + // if the data_type field is not set. Use data_type field instead. + DataType key_type = 1 [deprecated = true]; + + // (Deprecated) The value type of the map. + // + // This field is deprecated since Spark 4.1+ and should only be set + // if the data_type field is not set. Use data_type field instead. + DataType value_type = 2 [deprecated = true]; + + // The literal keys that make up the map. repeated Literal keys = 3; + + // The literal values that make up the map. repeated Literal values = 4; + + // The type of the map. + // + // If the key/value types can be inferred from the first element of the keys/values fields, + // then you don't need to set data_type.key_type/data_type.value_type to save space. + // On the other hand, redundant type information is also acceptable. + DataType.Map data_type = 5; } message Struct { diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/columnNodeSupport.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/columnNodeSupport.scala index 1e798387726b..cbbec0599b77 100644 --- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/columnNodeSupport.scala +++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/columnNodeSupport.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin} import org.apache.spark.sql.connect.ConnectConversions._ import org.apache.spark.sql.connect.common.DataTypeProtoConverter -import org.apache.spark.sql.connect.common.LiteralValueProtoConverter.toLiteralProtoBuilder +import org.apache.spark.sql.connect.common.LiteralValueProtoConverter.{toLiteralProtoBuilderWithOptions, ToLiteralProtoOptions} import org.apache.spark.sql.expressions.{Aggregator, UserDefinedAggregateFunction, UserDefinedAggregator, UserDefinedFunction} import org.apache.spark.sql.internal.{Alias, CaseWhenOtherwise, Cast, ColumnNode, ColumnNodeLike, InvokeInlineUserDefinedFunction, LambdaFunction, LazyExpression, Literal, SortOrder, SqlExpression, SubqueryExpression, SubqueryType, UnresolvedAttribute, UnresolvedExtractValue, UnresolvedFunction, UnresolvedNamedLambdaVariable, UnresolvedRegex, UnresolvedStar, UpdateFields, Window, WindowFrame} @@ -65,11 +65,12 @@ object ColumnNodeToProtoConverter extends (ColumnNode => proto.Expression) { val builder = proto.Expression.newBuilder() val n = additionalTransformation.map(_(node)).getOrElse(node) n match { - case Literal(value, None, _) => - builder.setLiteral(toLiteralProtoBuilder(value)) - - case Literal(value, Some(dataType), _) => - builder.setLiteral(toLiteralProtoBuilder(value, dataType)) + case Literal(value, dataTypeOpt, _) => + builder.setLiteral( + toLiteralProtoBuilderWithOptions( + value, + dataTypeOpt, + ToLiteralProtoOptions(useDeprecatedDataTypeFields = false))) case u @ UnresolvedAttribute(unparsedIdentifier, planId, isMetadataColumn, _) => val escapedName = u.sql diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala index 293ffe17bb4f..b77c1d43cee7 100644 --- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala +++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala @@ -40,14 +40,10 @@ import org.apache.spark.util.SparkClassUtils object LiteralValueProtoConverter { - /** - * Transforms literal value to the `proto.Expression.Literal.Builder`. - * - * @return - * proto.Expression.Literal.Builder - */ @scala.annotation.tailrec - def toLiteralProtoBuilder(literal: Any): proto.Expression.Literal.Builder = { + private def toLiteralProtoBuilderInternal( + literal: Any, + options: ToLiteralProtoOptions): proto.Expression.Literal.Builder = { val builder = proto.Expression.Literal.newBuilder() def decimalBuilder(precision: Int, scale: Int, value: String) = { @@ -63,8 +59,17 @@ object LiteralValueProtoConverter { def arrayBuilder(array: Array[_]) = { val ab = builder.getArrayBuilder - .setElementType(toConnectProtoType(toDataType(array.getClass.getComponentType))) - array.foreach(x => ab.addElements(toLiteralProto(x))) + if (options.useDeprecatedDataTypeFields) { + ab.setElementType(toConnectProtoType(toDataType(array.getClass.getComponentType))) + } else { + ab.setDataType( + proto.DataType.Array + .newBuilder() + .setElementType(toConnectProtoType(toDataType(array.getClass.getComponentType))) + .setContainsNull(true) + .build()) + } + array.foreach(x => ab.addElements(toLiteralProtoWithOptions(x, None, options))) ab } @@ -84,8 +89,8 @@ object LiteralValueProtoConverter { case v: Char => builder.setString(v.toString) case v: Array[Char] => builder.setString(String.valueOf(v)) case v: Array[Byte] => builder.setBinary(ByteString.copyFrom(v)) - case v: mutable.ArraySeq[_] => toLiteralProtoBuilder(v.array) - case v: immutable.ArraySeq[_] => toLiteralProtoBuilder(v.unsafeArray) + case v: mutable.ArraySeq[_] => toLiteralProtoBuilderInternal(v.array, options) + case v: immutable.ArraySeq[_] => toLiteralProtoBuilderInternal(v.unsafeArray, options) case v: LocalDate => builder.setDate(v.toEpochDay.toInt) case v: Decimal => builder.setDecimal(decimalBuilder(Math.max(v.precision, v.scale), v.scale, v.toString)) @@ -110,19 +115,31 @@ object LiteralValueProtoConverter { } @scala.annotation.tailrec - def toLiteralProtoBuilder( + private def toLiteralProtoBuilderInternal( literal: Any, - dataType: DataType): proto.Expression.Literal.Builder = { + dataType: DataType, + options: ToLiteralProtoOptions): proto.Expression.Literal.Builder = { val builder = proto.Expression.Literal.newBuilder() - def arrayBuilder(scalaValue: Any, elementType: DataType) = { - val ab = builder.getArrayBuilder.setElementType(toConnectProtoType(elementType)) - + def arrayBuilder(scalaValue: Any, elementType: DataType, containsNull: Boolean) = { + val ab = builder.getArrayBuilder + if (options.useDeprecatedDataTypeFields) { + ab.setElementType(toConnectProtoType(elementType)) + } else { + ab.setDataType( + proto.DataType.Array + .newBuilder() + .setElementType(toConnectProtoType(elementType)) + .setContainsNull(containsNull) + .build()) + } scalaValue match { case a: Array[_] => - a.foreach(item => ab.addElements(toLiteralProto(item, elementType))) + a.foreach(item => + ab.addElements(toLiteralProtoWithOptions(item, Some(elementType), options))) case s: scala.collection.Seq[_] => - s.foreach(item => ab.addElements(toLiteralProto(item, elementType))) + s.foreach(item => + ab.addElements(toLiteralProtoWithOptions(item, Some(elementType), options))) case other => throw new IllegalArgumentException(s"literal $other not supported (yet).") } @@ -130,16 +147,30 @@ object LiteralValueProtoConverter { ab } - def mapBuilder(scalaValue: Any, keyType: DataType, valueType: DataType) = { + def mapBuilder( + scalaValue: Any, + keyType: DataType, + valueType: DataType, + valueContainsNull: Boolean) = { val mb = builder.getMapBuilder - .setKeyType(toConnectProtoType(keyType)) - .setValueType(toConnectProtoType(valueType)) + if (options.useDeprecatedDataTypeFields) { + mb.setKeyType(toConnectProtoType(keyType)) + mb.setValueType(toConnectProtoType(valueType)) + } else { + mb.setDataType( + proto.DataType.Map + .newBuilder() + .setKeyType(toConnectProtoType(keyType)) + .setValueType(toConnectProtoType(valueType)) + .setValueContainsNull(valueContainsNull) + .build()) + } scalaValue match { case map: scala.collection.Map[_, _] => map.foreach { case (k, v) => - mb.addKeys(toLiteralProto(k, keyType)) - mb.addValues(toLiteralProto(v, valueType)) + mb.addKeys(toLiteralProtoWithOptions(k, Some(keyType), options)) + mb.addValues(toLiteralProtoWithOptions(v, Some(valueType), options)) } case other => throw new IllegalArgumentException(s"literal $other not supported (yet).") @@ -155,30 +186,42 @@ object LiteralValueProtoConverter { scalaValue match { case p: Product => val iter = p.productIterator - val dataTypeStruct = proto.DataType.Struct.newBuilder() var idx = 0 - while (idx < structType.size) { - val field = fields(idx) - val literalProto = toLiteralProto(iter.next(), field.dataType) - sb.addElements(literalProto) - - val fieldBuilder = dataTypeStruct - .addFieldsBuilder() - .setName(field.name) - .setNullable(field.nullable) - - if (LiteralValueProtoConverter.getInferredDataType(literalProto).isEmpty) { - fieldBuilder.setDataType(toConnectProtoType(field.dataType)) + if (options.useDeprecatedDataTypeFields) { + while (idx < structType.size) { + val field = fields(idx) + val literalProto = + toLiteralProtoWithOptions(iter.next(), Some(field.dataType), options) + sb.addElements(literalProto) + idx += 1 } + sb.setStructType(toConnectProtoType(structType)) + } else { + val dataTypeStruct = proto.DataType.Struct.newBuilder() + while (idx < structType.size) { + val field = fields(idx) + val literalProto = + toLiteralProtoWithOptions(iter.next(), Some(field.dataType), options) + sb.addElements(literalProto) + + val fieldBuilder = dataTypeStruct + .addFieldsBuilder() + .setName(field.name) + .setNullable(field.nullable) + + if (LiteralValueProtoConverter.getInferredDataType(literalProto).isEmpty) { + fieldBuilder.setDataType(toConnectProtoType(field.dataType)) + } - // Set metadata if available - if (field.metadata != Metadata.empty) { - fieldBuilder.setMetadata(field.metadata.json) - } + // Set metadata if available + if (field.metadata != Metadata.empty) { + fieldBuilder.setMetadata(field.metadata.json) + } - idx += 1 + idx += 1 + } + sb.setDataTypeStruct(dataTypeStruct.build()) } - sb.setDataTypeStruct(dataTypeStruct.build()) case other => throw new IllegalArgumentException(s"literal $other not supported (yet).") } @@ -188,20 +231,20 @@ object LiteralValueProtoConverter { (literal, dataType) match { case (v: mutable.ArraySeq[_], ArrayType(_, _)) => - toLiteralProtoBuilder(v.array, dataType) + toLiteralProtoBuilderInternal(v.array, dataType, options) case (v: immutable.ArraySeq[_], ArrayType(_, _)) => - toLiteralProtoBuilder(v.unsafeArray, dataType) + toLiteralProtoBuilderInternal(v.unsafeArray, dataType, options) case (v: Array[Byte], ArrayType(_, _)) => - toLiteralProtoBuilder(v) - case (v, ArrayType(elementType, _)) => - builder.setArray(arrayBuilder(v, elementType)) - case (v, MapType(keyType, valueType, _)) => - builder.setMap(mapBuilder(v, keyType, valueType)) + toLiteralProtoBuilderInternal(v, options) + case (v, ArrayType(elementType, containsNull)) => + builder.setArray(arrayBuilder(v, elementType, containsNull)) + case (v, MapType(keyType, valueType, valueContainsNull)) => + builder.setMap(mapBuilder(v, keyType, valueType, valueContainsNull)) case (v, structType: StructType) => builder.setStruct(structBuilder(v, structType)) case (v: Option[_], _: DataType) => if (v.isDefined) { - toLiteralProtoBuilder(v.get) + toLiteralProtoBuilderInternal(v.get, options) } else { builder.setNull(toConnectProtoType(dataType)) } @@ -210,7 +253,41 @@ object LiteralValueProtoConverter { builder.getTimeBuilder .setNano(SparkDateTimeUtils.localTimeToNanos(v)) .setPrecision(timeType.precision)) - case _ => toLiteralProtoBuilder(literal) + case _ => toLiteralProtoBuilderInternal(literal, options) + } + + } + + /** + * Transforms literal value to the `proto.Expression.Literal.Builder`. + * + * @return + * proto.Expression.Literal.Builder + */ + def toLiteralProtoBuilder(literal: Any): proto.Expression.Literal.Builder = { + toLiteralProtoBuilderInternal( + literal, + ToLiteralProtoOptions(useDeprecatedDataTypeFields = true)) + } + + def toLiteralProtoBuilder( + literal: Any, + dataType: DataType): proto.Expression.Literal.Builder = { + toLiteralProtoBuilderInternal( + literal, + dataType, + ToLiteralProtoOptions(useDeprecatedDataTypeFields = true)) + } + + def toLiteralProtoBuilderWithOptions( + literal: Any, + dataTypeOpt: Option[DataType], + options: ToLiteralProtoOptions): proto.Expression.Literal.Builder = { + dataTypeOpt match { + case Some(dataType) => + toLiteralProtoBuilderInternal(literal, dataType, options) + case None => + toLiteralProtoBuilderInternal(literal, options) } } @@ -221,6 +298,8 @@ object LiteralValueProtoConverter { toLiteralProtoBuilder(v) } + case class ToLiteralProtoOptions(useDeprecatedDataTypeFields: Boolean) + /** * Transforms literal value to the `proto.Expression.Literal`. * @@ -228,10 +307,27 @@ object LiteralValueProtoConverter { * proto.Expression.Literal */ def toLiteralProto(literal: Any): proto.Expression.Literal = - toLiteralProtoBuilder(literal).build() + toLiteralProtoBuilderInternal( + literal, + ToLiteralProtoOptions(useDeprecatedDataTypeFields = true)).build() def toLiteralProto(literal: Any, dataType: DataType): proto.Expression.Literal = - toLiteralProtoBuilder(literal, dataType).build() + toLiteralProtoBuilderInternal( + literal, + dataType, + ToLiteralProtoOptions(useDeprecatedDataTypeFields = true)).build() + + def toLiteralProtoWithOptions( + literal: Any, + dataTypeOpt: Option[DataType], + options: ToLiteralProtoOptions): proto.Expression.Literal = { + dataTypeOpt match { + case Some(dataType) => + toLiteralProtoBuilderInternal(literal, dataType, options).build() + case None => + toLiteralProtoBuilderInternal(literal, options).build() + } + } private[sql] def toDataType(clz: Class[_]): DataType = clz match { // primitive types @@ -349,12 +445,14 @@ object LiteralValueProtoConverter { v => val interval = v.getCalendarInterval new CalendarInterval(interval.getMonths, interval.getDays, interval.getMicroseconds) - case proto.DataType.KindCase.ARRAY => v => toCatalystArray(v.getArray) - case proto.DataType.KindCase.MAP => v => toCatalystMap(v.getMap) + case proto.DataType.KindCase.ARRAY => + v => toCatalystArrayInternal(v.getArray, dataType.getArray) + case proto.DataType.KindCase.MAP => + v => toCatalystMapInternal(v.getMap, dataType.getMap) case proto.DataType.KindCase.STRUCT => v => toCatalystStructInternal(v.getStruct, dataType.getStruct) case _ => - throw InvalidPlanInput(s"Unsupported Literal Type: $dataType)") + throw InvalidPlanInput(s"Unsupported Literal Type: ${dataType.getKindCase}") } } @@ -417,6 +515,59 @@ object LiteralValueProtoConverter { } else { builder.setStruct(proto.DataType.Struct.newBuilder.build()) } + case proto.Expression.Literal.LiteralTypeCase.ARRAY => + if (recursive) { + val arrayType = literal.getArray.getDataType + val elementTypeOpt = if (arrayType.hasElementType) { + Some(arrayType.getElementType) + } else if (literal.getArray.getElementsCount > 0) { + getInferredDataType(literal.getArray.getElements(0), recursive = true) + } else { + None + } + if (elementTypeOpt.isDefined) { + builder.setArray( + proto.DataType.Array + .newBuilder() + .setElementType(elementTypeOpt.get) + .setContainsNull(arrayType.getContainsNull) + .build()) + } else { + return None + } + } else { + builder.setArray(proto.DataType.Array.newBuilder.build()) + } + case proto.Expression.Literal.LiteralTypeCase.MAP => + if (recursive) { + val mapType = literal.getMap.getDataType + val keyTypeOpt = if (mapType.hasKeyType) { + Some(mapType.getKeyType) + } else if (literal.getMap.getKeysCount > 0) { + getInferredDataType(literal.getMap.getKeys(0), recursive = true) + } else { + None + } + val valueTypeOpt = if (mapType.hasValueType) { + Some(mapType.getValueType) + } else if (literal.getMap.getValuesCount > 0) { + getInferredDataType(literal.getMap.getValues(0), recursive = true) + } else { + None + } + if (keyTypeOpt.isDefined && valueTypeOpt.isDefined) { + builder.setMap( + proto.DataType.Map.newBuilder + .setKeyType(keyTypeOpt.get) + .setValueType(valueTypeOpt.get) + .setValueContainsNull(mapType.getValueContainsNull) + .build()) + } else { + return None + } + } else { + builder.setMap(proto.DataType.Map.newBuilder.build()) + } case _ => // Not all data types support inferring the data type from the literal at the moment. // e.g. the type of DayTimeInterval contains extra information like start_field and @@ -426,7 +577,9 @@ object LiteralValueProtoConverter { Some(builder.build()) } - def toCatalystArray(array: proto.Expression.Literal.Array): Array[_] = { + private def toCatalystArrayInternal( + array: proto.Expression.Literal.Array, + arrayType: proto.DataType.Array): Array[_] = { def makeArrayData[T](converter: proto.Expression.Literal => T)(implicit tag: ClassTag[T]): Array[T] = { val size = array.getElementsCount @@ -437,10 +590,35 @@ object LiteralValueProtoConverter { } } - makeArrayData(getConverter(array.getElementType)) + makeArrayData(getConverter(arrayType.getElementType)) } - def toCatalystMap(map: proto.Expression.Literal.Map): mutable.Map[_, _] = { + def getProtoArrayType(array: proto.Expression.Literal.Array): proto.DataType.Array = { + if (array.hasDataType) { + val literal = proto.Expression.Literal.newBuilder().setArray(array).build() + getInferredDataType(literal, recursive = true) match { + case Some(dataType) => dataType.getArray + case None => throw InvalidPlanInput("Cannot infer data type from this array literal.") + } + } else if (array.hasElementType) { + // For backward compatibility, we still support the old way to + // define the type of the array. + proto.DataType.Array.newBuilder + .setElementType(array.getElementType) + .setContainsNull(true) + .build() + } else { + throw InvalidPlanInput("Data type information is missing in the array literal.") + } + } + + def toCatalystArray(array: proto.Expression.Literal.Array): Array[_] = { + toCatalystArrayInternal(array, getProtoArrayType(array)) + } + + private def toCatalystMapInternal( + map: proto.Expression.Literal.Map, + mapType: proto.DataType.Map): mutable.Map[_, _] = { def makeMapData[K, V]( keyConverter: proto.Expression.Literal => K, valueConverter: proto.Expression.Literal => V)(implicit @@ -457,7 +635,31 @@ object LiteralValueProtoConverter { } } - makeMapData(getConverter(map.getKeyType), getConverter(map.getValueType)) + makeMapData(getConverter(mapType.getKeyType), getConverter(mapType.getValueType)) + } + + def getProtoMapType(map: proto.Expression.Literal.Map): proto.DataType.Map = { + if (map.hasDataType) { + val literal = proto.Expression.Literal.newBuilder().setMap(map).build() + getInferredDataType(literal, recursive = true) match { + case Some(dataType) => dataType.getMap + case None => throw InvalidPlanInput("Cannot infer data type from this map literal.") + } + } else if (map.hasKeyType && map.hasValueType) { + // For backward compatibility, we still support the old way to + // define the type of the map. + proto.DataType.Map.newBuilder + .setKeyType(map.getKeyType) + .setValueType(map.getValueType) + .setValueContainsNull(true) + .build() + } else { + throw InvalidPlanInput("Data type information is missing in the map literal.") + } + } + + def toCatalystMap(map: proto.Expression.Literal.Map): mutable.Map[_, _] = { + toCatalystMapInternal(map, getProtoMapType(map)) } private def toCatalystStructInternal( diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lit.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lit.json index 57a17148abe4..cedf7572a1fd 100644 --- a/sql/connect/common/src/test/resources/query-tests/queries/function_lit.json +++ b/sql/connect/common/src/test/resources/query-tests/queries/function_lit.json @@ -358,15 +358,18 @@ }, { "literal": { "array": { - "elementType": { - "integer": { - } - }, "elements": [{ "integer": 8 }, { "integer": 6 - }] + }], + "dataType": { + "elementType": { + "integer": { + } + }, + "containsNull": true + } } }, "common": { diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin index 6e4ec0e2ffef..5d30f4fca159 100644 Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin differ diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.json index 337b3366649f..53b1a7b3947f 100644 --- a/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.json +++ b/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.json @@ -14,9 +14,12 @@ "expressions": [{ "literal": { "array": { - "elementType": { - "double": { - } + "dataType": { + "elementType": { + "double": { + } + }, + "containsNull": true } } }, @@ -40,46 +43,58 @@ }, { "literal": { "array": { - "elementType": { - "array": { - "elementType": { - "integer": { - } - }, - "containsNull": true - } - }, "elements": [{ "array": { - "elementType": { - "integer": { - } - }, "elements": [{ "integer": 1 - }] + }], + "dataType": { + "elementType": { + "integer": { + } + }, + "containsNull": true + } } }, { "array": { - "elementType": { - "integer": { - } - }, "elements": [{ "integer": 2 - }] + }], + "dataType": { + "elementType": { + "integer": { + } + }, + "containsNull": true + } } }, { "array": { - "elementType": { - "integer": { - } - }, "elements": [{ "integer": 3 - }] - } - }] + }], + "dataType": { + "elementType": { + "integer": { + } + }, + "containsNull": true + } + } + }], + "dataType": { + "elementType": { + "array": { + "elementType": { + "integer": { + } + }, + "containsNull": true + } + }, + "containsNull": true + } } }, "common": { @@ -102,90 +117,111 @@ }, { "literal": { "array": { - "elementType": { - "array": { - "elementType": { - "array": { - "elementType": { - "integer": { - } - }, - "containsNull": true - } - }, - "containsNull": true - } - }, "elements": [{ "array": { - "elementType": { - "array": { - "elementType": { - "integer": { - } - }, - "containsNull": true - } - }, "elements": [{ "array": { - "elementType": { - "integer": { - } - }, "elements": [{ "integer": 1 - }] + }], + "dataType": { + "elementType": { + "integer": { + } + }, + "containsNull": true + } } - }] + }], + "dataType": { + "elementType": { + "array": { + "elementType": { + "integer": { + } + }, + "containsNull": true + } + }, + "containsNull": true + } } }, { "array": { - "elementType": { - "array": { - "elementType": { - "integer": { - } - }, - "containsNull": true - } - }, "elements": [{ "array": { - "elementType": { - "integer": { - } - }, "elements": [{ "integer": 2 - }] + }], + "dataType": { + "elementType": { + "integer": { + } + }, + "containsNull": true + } } - }] + }], + "dataType": { + "elementType": { + "array": { + "elementType": { + "integer": { + } + }, + "containsNull": true + } + }, + "containsNull": true + } } }, { "array": { - "elementType": { - "array": { - "elementType": { - "integer": { - } - }, - "containsNull": true - } - }, "elements": [{ "array": { - "elementType": { - "integer": { - } - }, "elements": [{ "integer": 3 - }] + }], + "dataType": { + "elementType": { + "integer": { + } + }, + "containsNull": true + } } - }] - } - }] + }], + "dataType": { + "elementType": { + "array": { + "elementType": { + "integer": { + } + }, + "containsNull": true + } + }, + "containsNull": true + } + } + }], + "dataType": { + "elementType": { + "array": { + "elementType": { + "array": { + "elementType": { + "integer": { + } + }, + "containsNull": true + } + }, + "containsNull": true + } + }, + "containsNull": true + } } }, "common": { @@ -208,15 +244,18 @@ }, { "literal": { "array": { - "elementType": { - "boolean": { - } - }, "elements": [{ "boolean": true }, { "boolean": false - }] + }], + "dataType": { + "elementType": { + "boolean": { + } + }, + "containsNull": true + } } }, "common": { @@ -260,17 +299,20 @@ }, { "literal": { "array": { - "elementType": { - "short": { - } - }, "elements": [{ "short": 9872 }, { "short": 9873 }, { "short": 9874 - }] + }], + "dataType": { + "elementType": { + "short": { + } + }, + "containsNull": true + } } }, "common": { @@ -293,17 +335,20 @@ }, { "literal": { "array": { - "elementType": { - "integer": { - } - }, "elements": [{ "integer": -8726532 }, { "integer": 8726532 }, { "integer": -8726533 - }] + }], + "dataType": { + "elementType": { + "integer": { + } + }, + "containsNull": true + } } }, "common": { @@ -326,17 +371,20 @@ }, { "literal": { "array": { - "elementType": { - "long": { - } - }, "elements": [{ "long": "7834609328726531" }, { "long": "7834609328726532" }, { "long": "7834609328726533" - }] + }], + "dataType": { + "elementType": { + "long": { + } + }, + "containsNull": true + } } }, "common": { @@ -359,17 +407,20 @@ }, { "literal": { "array": { - "elementType": { - "double": { - } - }, "elements": [{ "double": 2.718281828459045 }, { "double": 1.0 }, { "double": 2.0 - }] + }], + "dataType": { + "elementType": { + "double": { + } + }, + "containsNull": true + } } }, "common": { @@ -392,17 +443,20 @@ }, { "literal": { "array": { - "elementType": { - "float": { - } - }, "elements": [{ "float": -0.8 }, { "float": -0.7 }, { "float": -0.9 - }] + }], + "dataType": { + "elementType": { + "float": { + } + }, + "containsNull": true + } } }, "common": { @@ -425,12 +479,6 @@ }, { "literal": { "array": { - "elementType": { - "decimal": { - "scale": 18, - "precision": 38 - } - }, "elements": [{ "decimal": { "value": "89.97620", @@ -443,7 +491,16 @@ "precision": 7, "scale": 5 } - }] + }], + "dataType": { + "elementType": { + "decimal": { + "scale": 18, + "precision": 38 + } + }, + "containsNull": true + } } }, "common": { @@ -466,12 +523,6 @@ }, { "literal": { "array": { - "elementType": { - "decimal": { - "scale": 18, - "precision": 38 - } - }, "elements": [{ "decimal": { "value": "89889.7667231", @@ -484,7 +535,16 @@ "precision": 12, "scale": 7 } - }] + }], + "dataType": { + "elementType": { + "decimal": { + "scale": 18, + "precision": 38 + } + }, + "containsNull": true + } } }, "common": { @@ -507,16 +567,19 @@ }, { "literal": { "array": { - "elementType": { - "string": { - "collation": "UTF8_BINARY" - } - }, "elements": [{ "string": "connect!" }, { "string": "disconnect!" - }] + }], + "dataType": { + "elementType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "containsNull": true + } } }, "common": { @@ -560,16 +623,19 @@ }, { "literal": { "array": { - "elementType": { - "string": { - "collation": "UTF8_BINARY" - } - }, "elements": [{ "string": "ABCDEFGHIJ" }, { "string": "BCDEFGHIJK" - }] + }], + "dataType": { + "elementType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "containsNull": true + } } }, "common": { @@ -592,15 +658,18 @@ }, { "literal": { "array": { - "elementType": { - "date": { - } - }, "elements": [{ "date": 18545 }, { "date": 18546 - }] + }], + "dataType": { + "elementType": { + "date": { + } + }, + "containsNull": true + } } }, "common": { @@ -623,15 +692,18 @@ }, { "literal": { "array": { - "elementType": { - "timestamp": { - } - }, "elements": [{ "timestamp": "1677155519808000" }, { "timestamp": "1677155519809000" - }] + }], + "dataType": { + "elementType": { + "timestamp": { + } + }, + "containsNull": true + } } }, "common": { @@ -654,15 +726,18 @@ }, { "literal": { "array": { - "elementType": { - "timestamp": { - } - }, "elements": [{ "timestamp": "12345000" }, { "timestamp": "23456000" - }] + }], + "dataType": { + "elementType": { + "timestamp": { + } + }, + "containsNull": true + } } }, "common": { @@ -685,15 +760,18 @@ }, { "literal": { "array": { - "elementType": { - "timestampNtz": { - } - }, "elements": [{ "timestampNtz": "1677184560000000" }, { "timestampNtz": "1677188160000000" - }] + }], + "dataType": { + "elementType": { + "timestampNtz": { + } + }, + "containsNull": true + } } }, "common": { @@ -716,15 +794,18 @@ }, { "literal": { "array": { - "elementType": { - "date": { - } - }, "elements": [{ "date": 19411 }, { "date": 19417 - }] + }], + "dataType": { + "elementType": { + "date": { + } + }, + "containsNull": true + } } }, "common": { @@ -747,17 +828,20 @@ }, { "literal": { "array": { - "elementType": { - "dayTimeInterval": { - "startField": 0, - "endField": 3 - } - }, "elements": [{ "dayTimeInterval": "100000000" }, { "dayTimeInterval": "200000000" - }] + }], + "dataType": { + "elementType": { + "dayTimeInterval": { + "startField": 0, + "endField": 3 + } + }, + "containsNull": true + } } }, "common": { @@ -780,17 +864,20 @@ }, { "literal": { "array": { - "elementType": { - "yearMonthInterval": { - "startField": 0, - "endField": 1 - } - }, "elements": [{ "yearMonthInterval": 0 }, { "yearMonthInterval": 0 - }] + }], + "dataType": { + "elementType": { + "yearMonthInterval": { + "startField": 0, + "endField": 1 + } + }, + "containsNull": true + } } }, "common": { @@ -813,10 +900,6 @@ }, { "literal": { "array": { - "elementType": { - "calendarInterval": { - } - }, "elements": [{ "calendarInterval": { "months": 2, @@ -829,7 +912,14 @@ "days": 21, "microseconds": "200" } - }] + }], + "dataType": { + "elementType": { + "calendarInterval": { + } + }, + "containsNull": true + } } }, "common": { diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin index 320da1025818..8cb965dd25a0 100644 Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin differ diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json index bd9d6bb3c8bb..66bf31d670f9 100644 --- a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json +++ b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json @@ -403,15 +403,17 @@ }, { "literal": { "array": { - "elementType": { - "integer": { - } - }, "elements": [{ "integer": 8 }, { "integer": 6 - }] + }], + "dataType": { + "elementType": { + "integer": { + } + } + } } }, "common": { @@ -700,17 +702,19 @@ }, { "literal": { "array": { - "elementType": { - "integer": { - } - }, "elements": [{ "integer": 1 }, { "integer": 2 }, { "integer": 3 - }] + }], + "dataType": { + "elementType": { + "integer": { + } + } + } } }, "common": { @@ -733,17 +737,19 @@ }, { "literal": { "array": { - "elementType": { - "integer": { - } - }, "elements": [{ "integer": 1 }, { "integer": 2 }, { "integer": 3 - }] + }], + "dataType": { + "elementType": { + "integer": { + } + } + } } }, "common": { @@ -766,15 +772,6 @@ }, { "literal": { "map": { - "keyType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "valueType": { - "integer": { - } - }, "keys": [{ "string": "a" }, { @@ -784,7 +781,18 @@ "integer": 1 }, { "integer": 2 - }] + }], + "dataType": { + "keyType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "valueType": { + "integer": { + } + } + } } }, "common": { @@ -875,13 +883,16 @@ }, { "literal": { "array": { - "elementType": { - "integer": { - } - }, "elements": [{ "integer": 1 - }] + }], + "dataType": { + "elementType": { + "integer": { + } + }, + "containsNull": true + } } }, "common": { @@ -904,14 +915,6 @@ }, { "literal": { "map": { - "keyType": { - "integer": { - } - }, - "valueType": { - "integer": { - } - }, "keys": [{ "integer": 1 }], @@ -920,7 +923,18 @@ "integer": { } } - }] + }], + "dataType": { + "keyType": { + "integer": { + } + }, + "valueType": { + "integer": { + } + }, + "valueContainsNull": true + } } }, "common": { @@ -943,14 +957,6 @@ }, { "literal": { "map": { - "keyType": { - "integer": { - } - }, - "valueType": { - "integer": { - } - }, "keys": [{ "integer": 1 }], @@ -959,7 +965,18 @@ "integer": { } } - }] + }], + "dataType": { + "keyType": { + "integer": { + } + }, + "valueType": { + "integer": { + } + }, + "valueContainsNull": true + } } }, "common": { @@ -982,14 +999,6 @@ }, { "literal": { "map": { - "keyType": { - "integer": { - } - }, - "valueType": { - "integer": { - } - }, "keys": [{ "integer": 1 }], @@ -998,7 +1007,18 @@ "integer": { } } - }] + }], + "dataType": { + "keyType": { + "integer": { + } + }, + "valueType": { + "integer": { + } + }, + "valueContainsNull": true + } } }, "common": { @@ -1021,57 +1041,66 @@ }, { "literal": { "array": { - "elementType": { - "array": { - "elementType": { - "integer": { - } - } - } - }, "elements": [{ "array": { - "elementType": { - "integer": { - } - }, "elements": [{ "integer": 1 }, { "integer": 2 }, { "integer": 3 - }] + }], + "dataType": { + "elementType": { + "integer": { + } + } + } } }, { "array": { - "elementType": { - "integer": { - } - }, "elements": [{ "integer": 4 }, { "integer": 5 }, { "integer": 6 - }] + }], + "dataType": { + "elementType": { + "integer": { + } + } + } } }, { "array": { - "elementType": { - "integer": { - } - }, "elements": [{ "integer": 7 }, { "integer": 8 }, { "integer": 9 - }] + }], + "dataType": { + "elementType": { + "integer": { + } + } + } } - }] + }], + "dataType": { + "elementType": { + "array": { + "elementType": { + "integer": { + } + } + } + }, + "containsNull": true + } } }, "common": { @@ -1094,30 +1123,8 @@ }, { "literal": { "array": { - "elementType": { - "map": { - "keyType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "valueType": { - "integer": { - } - } - } - }, "elements": [{ "map": { - "keyType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "valueType": { - "integer": { - } - }, "keys": [{ "string": "a" }, { @@ -1127,19 +1134,21 @@ "integer": 1 }, { "integer": 2 - }] + }], + "dataType": { + "keyType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "valueType": { + "integer": { + } + } + } } }, { "map": { - "keyType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "valueType": { - "integer": { - } - }, "keys": [{ "string": "a" }, { @@ -1149,19 +1158,21 @@ "integer": 3 }, { "integer": 4 - }] + }], + "dataType": { + "keyType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "valueType": { + "integer": { + } + } + } } }, { "map": { - "keyType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "valueType": { - "integer": { - } - }, "keys": [{ "string": "a" }, { @@ -1171,9 +1182,36 @@ "integer": 5 }, { "integer": 6 - }] + }], + "dataType": { + "keyType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "valueType": { + "integer": { + } + } + } } - }] + }], + "dataType": { + "elementType": { + "map": { + "keyType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "valueType": { + "integer": { + } + } + } + }, + "containsNull": true + } } }, "common": { @@ -1196,23 +1234,6 @@ }, { "literal": { "map": { - "keyType": { - "integer": { - } - }, - "valueType": { - "map": { - "keyType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "valueType": { - "integer": { - } - } - } - }, "keys": [{ "integer": 1 }, { @@ -1220,15 +1241,6 @@ }], "values": [{ "map": { - "keyType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "valueType": { - "integer": { - } - }, "keys": [{ "string": "a" }, { @@ -1238,19 +1250,21 @@ "integer": 1 }, { "integer": 2 - }] + }], + "dataType": { + "keyType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "valueType": { + "integer": { + } + } + } } }, { "map": { - "keyType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "valueType": { - "integer": { - } - }, "keys": [{ "string": "a" }, { @@ -1260,9 +1274,40 @@ "integer": 3 }, { "integer": 4 - }] + }], + "dataType": { + "keyType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "valueType": { + "integer": { + } + } + } } - }] + }], + "dataType": { + "keyType": { + "integer": { + } + }, + "valueType": { + "map": { + "keyType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "valueType": { + "integer": { + } + } + } + }, + "valueContainsNull": true + } } }, "common": { @@ -1287,29 +1332,22 @@ "struct": { "elements": [{ "array": { - "elementType": { - "integer": { - } - }, "elements": [{ "integer": 1 }, { "integer": 2 }, { "integer": 3 - }] + }], + "dataType": { + "elementType": { + "integer": { + } + } + } } }, { "map": { - "keyType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "valueType": { - "integer": { - } - }, "keys": [{ "string": "a" }, { @@ -1319,7 +1357,18 @@ "integer": 1 }, { "integer": 2 - }] + }], + "dataType": { + "keyType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "valueType": { + "integer": { + } + } + } } }, { "struct": { @@ -1327,15 +1376,6 @@ "string": "a" }, { "map": { - "keyType": { - "integer": { - } - }, - "valueType": { - "string": { - "collation": "UTF8_BINARY" - } - }, "keys": [{ "integer": 1 }, { @@ -1345,7 +1385,19 @@ "string": "a" }, { "string": "b" - }] + }], + "dataType": { + "keyType": { + "integer": { + } + }, + "valueType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "valueContainsNull": true + } } }], "dataTypeStruct": { @@ -1359,20 +1411,6 @@ "nullable": true }, { "name": "_2", - "dataType": { - "map": { - "keyType": { - "integer": { - } - }, - "valueType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "valueContainsNull": true - } - }, "nullable": true }] } @@ -1381,30 +1419,9 @@ "dataTypeStruct": { "fields": [{ "name": "_1", - "dataType": { - "array": { - "elementType": { - "integer": { - } - } - } - }, "nullable": true }, { "name": "_2", - "dataType": { - "map": { - "keyType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "valueType": { - "integer": { - } - } - } - }, "nullable": true }, { "name": "_3", diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin index da3a4a946d21..b3ebe8a79e3e 100644 Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin differ diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverter.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverter.scala index f4c56d461bd2..e1d30240f867 100644 --- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverter.scala +++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverter.scala @@ -105,16 +105,20 @@ object LiteralExpressionProtoConverter { expressions.Literal(lit.getTime.getNano, TimeType(precision)) case proto.Expression.Literal.LiteralTypeCase.ARRAY => - expressions.Literal.create( - LiteralValueProtoConverter.toCatalystArray(lit.getArray), - ArrayType(DataTypeProtoConverter.toCatalystType(lit.getArray.getElementType))) + val arrayData = LiteralValueProtoConverter.toCatalystArray(lit.getArray) + val dataType = DataTypeProtoConverter.toCatalystType( + proto.DataType.newBuilder + .setArray(LiteralValueProtoConverter.getProtoArrayType(lit.getArray)) + .build()) + expressions.Literal.create(arrayData, dataType) case proto.Expression.Literal.LiteralTypeCase.MAP => - expressions.Literal.create( - LiteralValueProtoConverter.toCatalystMap(lit.getMap), - MapType( - DataTypeProtoConverter.toCatalystType(lit.getMap.getKeyType), - DataTypeProtoConverter.toCatalystType(lit.getMap.getValueType))) + val mapData = LiteralValueProtoConverter.toCatalystMap(lit.getMap) + val dataType = DataTypeProtoConverter.toCatalystType( + proto.DataType.newBuilder + .setMap(LiteralValueProtoConverter.getProtoMapType(lit.getMap)) + .build()) + expressions.Literal.create(mapData, dataType) case proto.Expression.Literal.LiteralTypeCase.STRUCT => val structData = LiteralValueProtoConverter.toCatalystStruct(lit.getStruct) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala index 71fcd2b39492..0af181e4be1a 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala @@ -21,15 +21,32 @@ import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite import org.apache.spark.connect.proto import org.apache.spark.sql.connect.common.LiteralValueProtoConverter +import org.apache.spark.sql.connect.common.LiteralValueProtoConverter.ToLiteralProtoOptions +import org.apache.spark.sql.connect.planner.LiteralExpressionProtoConverter import org.apache.spark.sql.types._ class LiteralExpressionProtoConverterSuite extends AnyFunSuite { // scalastyle:ignore funsuite + private def toLiteralProto(v: Any): proto.Expression.Literal = { + LiteralValueProtoConverter + .toLiteralProtoWithOptions( + v, + None, + ToLiteralProtoOptions(useDeprecatedDataTypeFields = false)) + } + + private def toLiteralProto(v: Any, t: DataType): proto.Expression.Literal = { + LiteralValueProtoConverter + .toLiteralProtoWithOptions( + v, + Some(t), + ToLiteralProtoOptions(useDeprecatedDataTypeFields = false)) + } + test("basic proto value and catalyst value conversion") { val values = Array(null, true, 1.toByte, 1.toShort, 1, 1L, 1.1d, 1.1f, "spark") for (v <- values) { - assertResult(v)( - LiteralValueProtoConverter.toCatalystValue(LiteralValueProtoConverter.toLiteralProto(v))) + assertResult(v)(LiteralValueProtoConverter.toCatalystValue(toLiteralProto(v))) } } @@ -62,10 +79,88 @@ class LiteralExpressionProtoConverterSuite extends AnyFunSuite { // scalastyle:i test(s"complex proto value and catalyst value conversion #$idx") { assertResult(v)( LiteralValueProtoConverter.toCatalystValue( - LiteralValueProtoConverter.toLiteralProto(v, t))) + LiteralValueProtoConverter.toLiteralProtoWithOptions( + v, + Some(t), + ToLiteralProtoOptions(useDeprecatedDataTypeFields = false)))) + } + + test(s"complex proto value and catalyst value conversion #$idx - backward compatibility") { + assertResult(v)( + LiteralValueProtoConverter.toCatalystValue( + LiteralValueProtoConverter.toLiteralProtoWithOptions( + v, + Some(t), + ToLiteralProtoOptions(useDeprecatedDataTypeFields = true)))) } } + test("backward compatibility for array literal proto") { + // Test the old way of defining arrays with elementType field and elements + val arrayProto = proto.Expression.Literal.Array + .newBuilder() + .setElementType( + proto.DataType + .newBuilder() + .setInteger(proto.DataType.Integer.newBuilder()) + .build()) + .addElements(toLiteralProto(1)) + .addElements(toLiteralProto(2)) + .addElements(toLiteralProto(3)) + .build() + + val literalProto = proto.Expression.Literal.newBuilder().setArray(arrayProto).build() + val literal = LiteralExpressionProtoConverter.toCatalystExpression(literalProto) + assert(literal.dataType.isInstanceOf[ArrayType]) + assert(literal.dataType.asInstanceOf[ArrayType].elementType == IntegerType) + // The containsNull field is always set to true when using the old way of defining arrays. + assert(literal.dataType.asInstanceOf[ArrayType].containsNull) + + val arrayData = literal.value.asInstanceOf[org.apache.spark.sql.catalyst.util.ArrayData] + assert(arrayData.numElements() == 3) + assert(arrayData.getInt(0) == 1) + assert(arrayData.getInt(1) == 2) + assert(arrayData.getInt(2) == 3) + } + + test("backward compatibility for map literal proto") { + // Test the old way of defining maps with keyType and valueType fields + val mapProto = proto.Expression.Literal.Map + .newBuilder() + .setKeyType( + proto.DataType + .newBuilder() + .setString(proto.DataType.String.newBuilder()) + .build()) + .setValueType( + proto.DataType + .newBuilder() + .setInteger(proto.DataType.Integer.newBuilder()) + .build()) + .addKeys(toLiteralProto("a")) + .addKeys(toLiteralProto("b")) + .addValues(toLiteralProto(1)) + .addValues(toLiteralProto(2)) + .build() + + val literalProto = proto.Expression.Literal.newBuilder().setMap(mapProto).build() + val literal = LiteralExpressionProtoConverter.toCatalystExpression(literalProto) + assert(literal.dataType.isInstanceOf[MapType]) + assert(literal.dataType.asInstanceOf[MapType].keyType == StringType) + assert(literal.dataType.asInstanceOf[MapType].valueType == IntegerType) + // The valueContainsNull field is always set to true when using the old way of defining maps. + assert(literal.dataType.asInstanceOf[MapType].valueContainsNull) + + val mapData = literal.value.asInstanceOf[org.apache.spark.sql.catalyst.util.MapData] + assert(mapData.numElements() == 2) + val keys = mapData.keyArray() + val values = mapData.valueArray() + assert(keys.getUTF8String(0).toString == "a") + assert(values.getInt(0) == 1) + assert(keys.getUTF8String(1).toString == "b") + assert(values.getInt(1) == 2) + } + test("backward compatibility for struct literal proto") { // Test the old way of defining structs with structType field and elements val structTypeProto = proto.DataType.Struct @@ -120,7 +215,7 @@ class LiteralExpressionProtoConverterSuite extends AnyFunSuite { // scalastyle:i } test("data types of struct fields are not set for inferable types") { - val literalProto = LiteralValueProtoConverter.toLiteralProto( + val literalProto = toLiteralProto( (1, 2.0, true, (1, 2)), StructType( Seq( @@ -137,7 +232,7 @@ class LiteralExpressionProtoConverterSuite extends AnyFunSuite { // scalastyle:i } test("data types of struct fields are set for non-inferable types") { - val literalProto = LiteralValueProtoConverter.toLiteralProto( + val literalProto = toLiteralProto( ("string", Decimal(1)), StructType(Seq(StructField("a", StringType), StructField("b", DecimalType(10, 2))))) assert(literalProto.getStruct.getDataTypeStruct.getFieldsList.get(0).hasDataType) @@ -145,7 +240,7 @@ class LiteralExpressionProtoConverterSuite extends AnyFunSuite { // scalastyle:i } test("nullable and metadata fields are set for struct literal proto") { - val literalProto = LiteralValueProtoConverter.toLiteralProto( + val literalProto = toLiteralProto( ("string", Decimal(1)), StructType(Seq( StructField("a", StringType, nullable = true, Metadata.fromJson("""{"key": "value"}""")),