diff --git a/python/pyspark/sql/connect/proto/types_pb2.py b/python/pyspark/sql/connect/proto/types_pb2.py index e7c0e7726ef1..3f43bf35af40 100644 --- a/python/pyspark/sql/connect/proto/types_pb2.py +++ b/python/pyspark/sql/connect/proto/types_pb2.py @@ -35,7 +35,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x19spark/connect/types.proto\x12\rspark.connect"\x85"\n\x08\x44\x61taType\x12\x32\n\x04null\x18\x01 \x01(\x0b\x32\x1c.spark.connect.DataType.NULLH\x00R\x04null\x12\x38\n\x06\x62inary\x18\x02 \x01(\x0b\x32\x1e.spark.connect.DataType.BinaryH\x00R\x06\x62inary\x12;\n\x07\x62oolean\x18\x03 \x01(\x0b\x32\x1f.spark.connect.DataType.BooleanH\x00R\x07\x62oolean\x12\x32\n\x04\x62yte\x18\x04 \x01(\x0b\x32\x1c.spark.connect.DataType.ByteH\x00R\x04\x62yte\x12\x35\n\x05short\x18\x05 \x01(\x0b\x32\x1d.spark.connect.DataType.ShortH\x00R\x05short\x12;\n\x07integer\x18\x06 \x01(\x0b\x32\x1f.spark.connect.DataType.IntegerH\x00R\x07integer\x12\x32\n\x04long\x18\x07 \x01(\x0b\x32\x1c.spark.connect.DataType.LongH\x00R\x04long\x12\x35\n\x05\x66loat\x18\x08 \x01(\x0b\x32\x1d.spark.connect.DataType.FloatH\x00R\x05\x66loat\x12\x38\n\x06\x64ouble\x18\t \x01(\x0b\x32\x1e.spark.connect.DataType.DoubleH\x00R\x06\x64ouble\x12;\n\x07\x64\x65\x63imal\x18\n \x01(\x0b\x32\x1f.spark.connect.DataType.DecimalH\x00R\x07\x64\x65\x63imal\x12\x38\n\x06string\x18\x0b \x01(\x0b\x32\x1e.spark.connect.DataType.StringH\x00R\x06string\x12\x32\n\x04\x63har\x18\x0c \x01(\x0b\x32\x1c.spark.connect.DataType.CharH\x00R\x04\x63har\x12<\n\x08var_char\x18\r \x01(\x0b\x32\x1f.spark.connect.DataType.VarCharH\x00R\x07varChar\x12\x32\n\x04\x64\x61te\x18\x0e \x01(\x0b\x32\x1c.spark.connect.DataType.DateH\x00R\x04\x64\x61te\x12\x41\n\ttimestamp\x18\x0f \x01(\x0b\x32!.spark.connect.DataType.TimestampH\x00R\ttimestamp\x12K\n\rtimestamp_ntz\x18\x10 \x01(\x0b\x32$.spark.connect.DataType.TimestampNTZH\x00R\x0ctimestampNtz\x12W\n\x11\x63\x61lendar_interval\x18\x11 \x01(\x0b\x32(.spark.connect.DataType.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12[\n\x13year_month_interval\x18\x12 \x01(\x0b\x32).spark.connect.DataType.YearMonthIntervalH\x00R\x11yearMonthInterval\x12U\n\x11\x64\x61y_time_interval\x18\x13 \x01(\x0b\x32\'.spark.connect.DataType.DayTimeIntervalH\x00R\x0f\x64\x61yTimeInterval\x12\x35\n\x05\x61rray\x18\x14 \x01(\x0b\x32\x1d.spark.connect.DataType.ArrayH\x00R\x05\x61rray\x12\x38\n\x06struct\x18\x15 \x01(\x0b\x32\x1e.spark.connect.DataType.StructH\x00R\x06struct\x12/\n\x03map\x18\x16 \x01(\x0b\x32\x1b.spark.connect.DataType.MapH\x00R\x03map\x12;\n\x07variant\x18\x19 \x01(\x0b\x32\x1f.spark.connect.DataType.VariantH\x00R\x07variant\x12/\n\x03udt\x18\x17 \x01(\x0b\x32\x1b.spark.connect.DataType.UDTH\x00R\x03udt\x12>\n\x08unparsed\x18\x18 \x01(\x0b\x32 .spark.connect.DataType.UnparsedH\x00R\x08unparsed\x1a\x43\n\x07\x42oolean\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04\x42yte\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x41\n\x05Short\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x43\n\x07Integer\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04Long\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x41\n\x05\x46loat\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x42\n\x06\x44ouble\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a`\n\x06String\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x12\x1c\n\tcollation\x18\x02 \x01(\tR\tcollation\x1a\x42\n\x06\x42inary\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04NULL\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x45\n\tTimestamp\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04\x44\x61te\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1aH\n\x0cTimestampNTZ\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1aL\n\x10\x43\x61lendarInterval\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\xb3\x01\n\x11YearMonthInterval\x12$\n\x0bstart_field\x18\x01 \x01(\x05H\x00R\nstartField\x88\x01\x01\x12 \n\tend_field\x18\x02 \x01(\x05H\x01R\x08\x65ndField\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x0e\n\x0c_start_fieldB\x0c\n\n_end_field\x1a\xb1\x01\n\x0f\x44\x61yTimeInterval\x12$\n\x0bstart_field\x18\x01 \x01(\x05H\x00R\nstartField\x88\x01\x01\x12 \n\tend_field\x18\x02 \x01(\x05H\x01R\x08\x65ndField\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x0e\n\x0c_start_fieldB\x0c\n\n_end_field\x1aX\n\x04\x43har\x12\x16\n\x06length\x18\x01 \x01(\x05R\x06length\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a[\n\x07VarChar\x12\x16\n\x06length\x18\x01 \x01(\x05R\x06length\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a\x99\x01\n\x07\x44\x65\x63imal\x12\x19\n\x05scale\x18\x01 \x01(\x05H\x00R\x05scale\x88\x01\x01\x12!\n\tprecision\x18\x02 \x01(\x05H\x01R\tprecision\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x08\n\x06_scaleB\x0c\n\n_precision\x1a\xa1\x01\n\x0bStructField\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x34\n\tdata_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x08\x64\x61taType\x12\x1a\n\x08nullable\x18\x03 \x01(\x08R\x08nullable\x12\x1f\n\x08metadata\x18\x04 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x7f\n\x06Struct\x12;\n\x06\x66ields\x18\x01 \x03(\x0b\x32#.spark.connect.DataType.StructFieldR\x06\x66ields\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a\xa2\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12#\n\rcontains_null\x18\x02 \x01(\x08R\x0c\x63ontainsNull\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReference\x1a\xdb\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12.\n\x13value_contains_null\x18\x03 \x01(\x08R\x11valueContainsNull\x12\x38\n\x18type_variation_reference\x18\x04 \x01(\rR\x16typeVariationReference\x1a\x43\n\x07Variant\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\xa1\x02\n\x03UDT\x12\x12\n\x04type\x18\x01 \x01(\tR\x04type\x12 \n\tjvm_class\x18\x02 \x01(\tH\x00R\x08jvmClass\x88\x01\x01\x12&\n\x0cpython_class\x18\x03 \x01(\tH\x01R\x0bpythonClass\x88\x01\x01\x12;\n\x17serialized_python_class\x18\x04 \x01(\tH\x02R\x15serializedPythonClass\x88\x01\x01\x12\x37\n\x08sql_type\x18\x05 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x03R\x07sqlType\x88\x01\x01\x42\x0c\n\n_jvm_classB\x0f\n\r_python_classB\x1a\n\x18_serialized_python_classB\x0b\n\t_sql_type\x1a\x34\n\x08Unparsed\x12(\n\x10\x64\x61ta_type_string\x18\x01 \x01(\tR\x0e\x64\x61taTypeStringB\x06\n\x04kindJ\x04\x08\x1a\x10\x1bJ\x04\x08\x1b\x10\x1c\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' + b"\n\x19spark/connect/types.proto\x12\rspark.connect\"\xac#\n\x08\x44\x61taType\x12\x32\n\x04null\x18\x01 \x01(\x0b\x32\x1c.spark.connect.DataType.NULLH\x00R\x04null\x12\x38\n\x06\x62inary\x18\x02 \x01(\x0b\x32\x1e.spark.connect.DataType.BinaryH\x00R\x06\x62inary\x12;\n\x07\x62oolean\x18\x03 \x01(\x0b\x32\x1f.spark.connect.DataType.BooleanH\x00R\x07\x62oolean\x12\x32\n\x04\x62yte\x18\x04 \x01(\x0b\x32\x1c.spark.connect.DataType.ByteH\x00R\x04\x62yte\x12\x35\n\x05short\x18\x05 \x01(\x0b\x32\x1d.spark.connect.DataType.ShortH\x00R\x05short\x12;\n\x07integer\x18\x06 \x01(\x0b\x32\x1f.spark.connect.DataType.IntegerH\x00R\x07integer\x12\x32\n\x04long\x18\x07 \x01(\x0b\x32\x1c.spark.connect.DataType.LongH\x00R\x04long\x12\x35\n\x05\x66loat\x18\x08 \x01(\x0b\x32\x1d.spark.connect.DataType.FloatH\x00R\x05\x66loat\x12\x38\n\x06\x64ouble\x18\t \x01(\x0b\x32\x1e.spark.connect.DataType.DoubleH\x00R\x06\x64ouble\x12;\n\x07\x64\x65\x63imal\x18\n \x01(\x0b\x32\x1f.spark.connect.DataType.DecimalH\x00R\x07\x64\x65\x63imal\x12\x38\n\x06string\x18\x0b \x01(\x0b\x32\x1e.spark.connect.DataType.StringH\x00R\x06string\x12\x32\n\x04\x63har\x18\x0c \x01(\x0b\x32\x1c.spark.connect.DataType.CharH\x00R\x04\x63har\x12<\n\x08var_char\x18\r \x01(\x0b\x32\x1f.spark.connect.DataType.VarCharH\x00R\x07varChar\x12\x32\n\x04\x64\x61te\x18\x0e \x01(\x0b\x32\x1c.spark.connect.DataType.DateH\x00R\x04\x64\x61te\x12\x41\n\ttimestamp\x18\x0f \x01(\x0b\x32!.spark.connect.DataType.TimestampH\x00R\ttimestamp\x12K\n\rtimestamp_ntz\x18\x10 \x01(\x0b\x32$.spark.connect.DataType.TimestampNTZH\x00R\x0ctimestampNtz\x12W\n\x11\x63\x61lendar_interval\x18\x11 \x01(\x0b\x32(.spark.connect.DataType.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12[\n\x13year_month_interval\x18\x12 \x01(\x0b\x32).spark.connect.DataType.YearMonthIntervalH\x00R\x11yearMonthInterval\x12U\n\x11\x64\x61y_time_interval\x18\x13 \x01(\x0b\x32'.spark.connect.DataType.DayTimeIntervalH\x00R\x0f\x64\x61yTimeInterval\x12\x35\n\x05\x61rray\x18\x14 \x01(\x0b\x32\x1d.spark.connect.DataType.ArrayH\x00R\x05\x61rray\x12\x38\n\x06struct\x18\x15 \x01(\x0b\x32\x1e.spark.connect.DataType.StructH\x00R\x06struct\x12/\n\x03map\x18\x16 \x01(\x0b\x32\x1b.spark.connect.DataType.MapH\x00R\x03map\x12;\n\x07variant\x18\x19 \x01(\x0b\x32\x1f.spark.connect.DataType.VariantH\x00R\x07variant\x12/\n\x03udt\x18\x17 \x01(\x0b\x32\x1b.spark.connect.DataType.UDTH\x00R\x03udt\x12>\n\x08unparsed\x18\x18 \x01(\x0b\x32 .spark.connect.DataType.UnparsedH\x00R\x08unparsed\x12\x32\n\x04time\x18\x1c \x01(\x0b\x32\x1c.spark.connect.DataType.TimeH\x00R\x04time\x1a\x43\n\x07\x42oolean\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04\x42yte\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x41\n\x05Short\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x43\n\x07Integer\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04Long\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x41\n\x05\x46loat\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x42\n\x06\x44ouble\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a`\n\x06String\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x12\x1c\n\tcollation\x18\x02 \x01(\tR\tcollation\x1a\x42\n\x06\x42inary\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04NULL\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x45\n\tTimestamp\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04\x44\x61te\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1aH\n\x0cTimestampNTZ\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1aq\n\x04Time\x12!\n\tprecision\x18\x01 \x01(\x05H\x00R\tprecision\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReferenceB\x0c\n\n_precision\x1aL\n\x10\x43\x61lendarInterval\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\xb3\x01\n\x11YearMonthInterval\x12$\n\x0bstart_field\x18\x01 \x01(\x05H\x00R\nstartField\x88\x01\x01\x12 \n\tend_field\x18\x02 \x01(\x05H\x01R\x08\x65ndField\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x0e\n\x0c_start_fieldB\x0c\n\n_end_field\x1a\xb1\x01\n\x0f\x44\x61yTimeInterval\x12$\n\x0bstart_field\x18\x01 \x01(\x05H\x00R\nstartField\x88\x01\x01\x12 \n\tend_field\x18\x02 \x01(\x05H\x01R\x08\x65ndField\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x0e\n\x0c_start_fieldB\x0c\n\n_end_field\x1aX\n\x04\x43har\x12\x16\n\x06length\x18\x01 \x01(\x05R\x06length\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a[\n\x07VarChar\x12\x16\n\x06length\x18\x01 \x01(\x05R\x06length\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a\x99\x01\n\x07\x44\x65\x63imal\x12\x19\n\x05scale\x18\x01 \x01(\x05H\x00R\x05scale\x88\x01\x01\x12!\n\tprecision\x18\x02 \x01(\x05H\x01R\tprecision\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x08\n\x06_scaleB\x0c\n\n_precision\x1a\xa1\x01\n\x0bStructField\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x34\n\tdata_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x08\x64\x61taType\x12\x1a\n\x08nullable\x18\x03 \x01(\x08R\x08nullable\x12\x1f\n\x08metadata\x18\x04 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x7f\n\x06Struct\x12;\n\x06\x66ields\x18\x01 \x03(\x0b\x32#.spark.connect.DataType.StructFieldR\x06\x66ields\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a\xa2\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12#\n\rcontains_null\x18\x02 \x01(\x08R\x0c\x63ontainsNull\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReference\x1a\xdb\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12.\n\x13value_contains_null\x18\x03 \x01(\x08R\x11valueContainsNull\x12\x38\n\x18type_variation_reference\x18\x04 \x01(\rR\x16typeVariationReference\x1a\x43\n\x07Variant\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\xa1\x02\n\x03UDT\x12\x12\n\x04type\x18\x01 \x01(\tR\x04type\x12 \n\tjvm_class\x18\x02 \x01(\tH\x00R\x08jvmClass\x88\x01\x01\x12&\n\x0cpython_class\x18\x03 \x01(\tH\x01R\x0bpythonClass\x88\x01\x01\x12;\n\x17serialized_python_class\x18\x04 \x01(\tH\x02R\x15serializedPythonClass\x88\x01\x01\x12\x37\n\x08sql_type\x18\x05 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x03R\x07sqlType\x88\x01\x01\x42\x0c\n\n_jvm_classB\x0f\n\r_python_classB\x1a\n\x18_serialized_python_classB\x0b\n\t_sql_type\x1a\x34\n\x08Unparsed\x12(\n\x10\x64\x61ta_type_string\x18\x01 \x01(\tR\x0e\x64\x61taTypeStringB\x06\n\x04kindJ\x04\x08\x1a\x10\x1bJ\x04\x08\x1b\x10\x1c\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3" ) _globals = globals() @@ -47,57 +47,59 @@ "DESCRIPTOR" ]._serialized_options = b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated" _globals["_DATATYPE"]._serialized_start = 45 - _globals["_DATATYPE"]._serialized_end = 4402 - _globals["_DATATYPE_BOOLEAN"]._serialized_start = 1595 - _globals["_DATATYPE_BOOLEAN"]._serialized_end = 1662 - _globals["_DATATYPE_BYTE"]._serialized_start = 1664 - _globals["_DATATYPE_BYTE"]._serialized_end = 1728 - _globals["_DATATYPE_SHORT"]._serialized_start = 1730 - _globals["_DATATYPE_SHORT"]._serialized_end = 1795 - _globals["_DATATYPE_INTEGER"]._serialized_start = 1797 - _globals["_DATATYPE_INTEGER"]._serialized_end = 1864 - _globals["_DATATYPE_LONG"]._serialized_start = 1866 - _globals["_DATATYPE_LONG"]._serialized_end = 1930 - _globals["_DATATYPE_FLOAT"]._serialized_start = 1932 - _globals["_DATATYPE_FLOAT"]._serialized_end = 1997 - _globals["_DATATYPE_DOUBLE"]._serialized_start = 1999 - _globals["_DATATYPE_DOUBLE"]._serialized_end = 2065 - _globals["_DATATYPE_STRING"]._serialized_start = 2067 - _globals["_DATATYPE_STRING"]._serialized_end = 2163 - _globals["_DATATYPE_BINARY"]._serialized_start = 2165 - _globals["_DATATYPE_BINARY"]._serialized_end = 2231 - _globals["_DATATYPE_NULL"]._serialized_start = 2233 - _globals["_DATATYPE_NULL"]._serialized_end = 2297 - _globals["_DATATYPE_TIMESTAMP"]._serialized_start = 2299 - _globals["_DATATYPE_TIMESTAMP"]._serialized_end = 2368 - _globals["_DATATYPE_DATE"]._serialized_start = 2370 - _globals["_DATATYPE_DATE"]._serialized_end = 2434 - _globals["_DATATYPE_TIMESTAMPNTZ"]._serialized_start = 2436 - _globals["_DATATYPE_TIMESTAMPNTZ"]._serialized_end = 2508 - _globals["_DATATYPE_CALENDARINTERVAL"]._serialized_start = 2510 - _globals["_DATATYPE_CALENDARINTERVAL"]._serialized_end = 2586 - _globals["_DATATYPE_YEARMONTHINTERVAL"]._serialized_start = 2589 - _globals["_DATATYPE_YEARMONTHINTERVAL"]._serialized_end = 2768 - _globals["_DATATYPE_DAYTIMEINTERVAL"]._serialized_start = 2771 - _globals["_DATATYPE_DAYTIMEINTERVAL"]._serialized_end = 2948 - _globals["_DATATYPE_CHAR"]._serialized_start = 2950 - _globals["_DATATYPE_CHAR"]._serialized_end = 3038 - _globals["_DATATYPE_VARCHAR"]._serialized_start = 3040 - _globals["_DATATYPE_VARCHAR"]._serialized_end = 3131 - _globals["_DATATYPE_DECIMAL"]._serialized_start = 3134 - _globals["_DATATYPE_DECIMAL"]._serialized_end = 3287 - _globals["_DATATYPE_STRUCTFIELD"]._serialized_start = 3290 - _globals["_DATATYPE_STRUCTFIELD"]._serialized_end = 3451 - _globals["_DATATYPE_STRUCT"]._serialized_start = 3453 - _globals["_DATATYPE_STRUCT"]._serialized_end = 3580 - _globals["_DATATYPE_ARRAY"]._serialized_start = 3583 - _globals["_DATATYPE_ARRAY"]._serialized_end = 3745 - _globals["_DATATYPE_MAP"]._serialized_start = 3748 - _globals["_DATATYPE_MAP"]._serialized_end = 3967 - _globals["_DATATYPE_VARIANT"]._serialized_start = 3969 - _globals["_DATATYPE_VARIANT"]._serialized_end = 4036 - _globals["_DATATYPE_UDT"]._serialized_start = 4039 - _globals["_DATATYPE_UDT"]._serialized_end = 4328 - _globals["_DATATYPE_UNPARSED"]._serialized_start = 4330 - _globals["_DATATYPE_UNPARSED"]._serialized_end = 4382 + _globals["_DATATYPE"]._serialized_end = 4569 + _globals["_DATATYPE_BOOLEAN"]._serialized_start = 1647 + _globals["_DATATYPE_BOOLEAN"]._serialized_end = 1714 + _globals["_DATATYPE_BYTE"]._serialized_start = 1716 + _globals["_DATATYPE_BYTE"]._serialized_end = 1780 + _globals["_DATATYPE_SHORT"]._serialized_start = 1782 + _globals["_DATATYPE_SHORT"]._serialized_end = 1847 + _globals["_DATATYPE_INTEGER"]._serialized_start = 1849 + _globals["_DATATYPE_INTEGER"]._serialized_end = 1916 + _globals["_DATATYPE_LONG"]._serialized_start = 1918 + _globals["_DATATYPE_LONG"]._serialized_end = 1982 + _globals["_DATATYPE_FLOAT"]._serialized_start = 1984 + _globals["_DATATYPE_FLOAT"]._serialized_end = 2049 + _globals["_DATATYPE_DOUBLE"]._serialized_start = 2051 + _globals["_DATATYPE_DOUBLE"]._serialized_end = 2117 + _globals["_DATATYPE_STRING"]._serialized_start = 2119 + _globals["_DATATYPE_STRING"]._serialized_end = 2215 + _globals["_DATATYPE_BINARY"]._serialized_start = 2217 + _globals["_DATATYPE_BINARY"]._serialized_end = 2283 + _globals["_DATATYPE_NULL"]._serialized_start = 2285 + _globals["_DATATYPE_NULL"]._serialized_end = 2349 + _globals["_DATATYPE_TIMESTAMP"]._serialized_start = 2351 + _globals["_DATATYPE_TIMESTAMP"]._serialized_end = 2420 + _globals["_DATATYPE_DATE"]._serialized_start = 2422 + _globals["_DATATYPE_DATE"]._serialized_end = 2486 + _globals["_DATATYPE_TIMESTAMPNTZ"]._serialized_start = 2488 + _globals["_DATATYPE_TIMESTAMPNTZ"]._serialized_end = 2560 + _globals["_DATATYPE_TIME"]._serialized_start = 2562 + _globals["_DATATYPE_TIME"]._serialized_end = 2675 + _globals["_DATATYPE_CALENDARINTERVAL"]._serialized_start = 2677 + _globals["_DATATYPE_CALENDARINTERVAL"]._serialized_end = 2753 + _globals["_DATATYPE_YEARMONTHINTERVAL"]._serialized_start = 2756 + _globals["_DATATYPE_YEARMONTHINTERVAL"]._serialized_end = 2935 + _globals["_DATATYPE_DAYTIMEINTERVAL"]._serialized_start = 2938 + _globals["_DATATYPE_DAYTIMEINTERVAL"]._serialized_end = 3115 + _globals["_DATATYPE_CHAR"]._serialized_start = 3117 + _globals["_DATATYPE_CHAR"]._serialized_end = 3205 + _globals["_DATATYPE_VARCHAR"]._serialized_start = 3207 + _globals["_DATATYPE_VARCHAR"]._serialized_end = 3298 + _globals["_DATATYPE_DECIMAL"]._serialized_start = 3301 + _globals["_DATATYPE_DECIMAL"]._serialized_end = 3454 + _globals["_DATATYPE_STRUCTFIELD"]._serialized_start = 3457 + _globals["_DATATYPE_STRUCTFIELD"]._serialized_end = 3618 + _globals["_DATATYPE_STRUCT"]._serialized_start = 3620 + _globals["_DATATYPE_STRUCT"]._serialized_end = 3747 + _globals["_DATATYPE_ARRAY"]._serialized_start = 3750 + _globals["_DATATYPE_ARRAY"]._serialized_end = 3912 + _globals["_DATATYPE_MAP"]._serialized_start = 3915 + _globals["_DATATYPE_MAP"]._serialized_end = 4134 + _globals["_DATATYPE_VARIANT"]._serialized_start = 4136 + _globals["_DATATYPE_VARIANT"]._serialized_end = 4203 + _globals["_DATATYPE_UDT"]._serialized_start = 4206 + _globals["_DATATYPE_UDT"]._serialized_end = 4495 + _globals["_DATATYPE_UNPARSED"]._serialized_start = 4497 + _globals["_DATATYPE_UNPARSED"]._serialized_end = 4549 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/connect/proto/types_pb2.pyi b/python/pyspark/sql/connect/proto/types_pb2.pyi index fcf35b8c1f19..d46770c4f888 100644 --- a/python/pyspark/sql/connect/proto/types_pb2.pyi +++ b/python/pyspark/sql/connect/proto/types_pb2.pyi @@ -279,6 +279,40 @@ class DataType(google.protobuf.message.Message): ], ) -> None: ... + class Time(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + PRECISION_FIELD_NUMBER: builtins.int + TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int + precision: builtins.int + type_variation_reference: builtins.int + def __init__( + self, + *, + precision: builtins.int | None = ..., + type_variation_reference: builtins.int = ..., + ) -> None: ... + def HasField( + self, + field_name: typing_extensions.Literal[ + "_precision", b"_precision", "precision", b"precision" + ], + ) -> builtins.bool: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "_precision", + b"_precision", + "precision", + b"precision", + "type_variation_reference", + b"type_variation_reference", + ], + ) -> None: ... + def WhichOneof( + self, oneof_group: typing_extensions.Literal["_precision", b"_precision"] + ) -> typing_extensions.Literal["precision"] | None: ... + class CalendarInterval(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -788,6 +822,7 @@ class DataType(google.protobuf.message.Message): VARIANT_FIELD_NUMBER: builtins.int UDT_FIELD_NUMBER: builtins.int UNPARSED_FIELD_NUMBER: builtins.int + TIME_FIELD_NUMBER: builtins.int @property def null(self) -> global___DataType.NULL: ... @property @@ -845,6 +880,8 @@ class DataType(google.protobuf.message.Message): @property def unparsed(self) -> global___DataType.Unparsed: """UnparsedDataType""" + @property + def time(self) -> global___DataType.Time: ... def __init__( self, *, @@ -873,6 +910,7 @@ class DataType(google.protobuf.message.Message): variant: global___DataType.Variant | None = ..., udt: global___DataType.UDT | None = ..., unparsed: global___DataType.Unparsed | None = ..., + time: global___DataType.Time | None = ..., ) -> None: ... def HasField( self, @@ -915,6 +953,8 @@ class DataType(google.protobuf.message.Message): b"string", "struct", b"struct", + "time", + b"time", "timestamp", b"timestamp", "timestamp_ntz", @@ -972,6 +1012,8 @@ class DataType(google.protobuf.message.Message): b"string", "struct", b"struct", + "time", + b"time", "timestamp", b"timestamp", "timestamp_ntz", @@ -1017,6 +1059,7 @@ class DataType(google.protobuf.message.Message): "variant", "udt", "unparsed", + "time", ] | None ): ... diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimeFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimeFormatter.scala index a159f74572f1..d0438c6ff1b4 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimeFormatter.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimeFormatter.scala @@ -135,4 +135,8 @@ object TimeFormatter { def apply(isParsing: Boolean): TimeFormatter = { getFormatter(None, defaultLocale, isParsing) } + + def getFractionFormatter(): TimeFormatter = { + new FractionTimeFormatter() + } } diff --git a/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala index 587ca43e5730..888d72822bc5 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala @@ -58,6 +58,7 @@ private[sql] object ArrowUtils { case TimestampType => new ArrowType.Timestamp(TimeUnit.MICROSECOND, timeZoneId) case TimestampNTZType => new ArrowType.Timestamp(TimeUnit.MICROSECOND, null) + case _: TimeType => new ArrowType.Time(TimeUnit.NANOSECOND, 8 * 8) case NullType => ArrowType.Null.INSTANCE case _: YearMonthIntervalType => new ArrowType.Interval(IntervalUnit.YEAR_MONTH) case _: DayTimeIntervalType => new ArrowType.Duration(TimeUnit.MICROSECOND) @@ -88,6 +89,8 @@ private[sql] object ArrowUtils { if ts.getUnit == TimeUnit.MICROSECOND && ts.getTimezone == null => TimestampNTZType case ts: ArrowType.Timestamp if ts.getUnit == TimeUnit.MICROSECOND => TimestampType + case t: ArrowType.Time if t.getUnit == TimeUnit.NANOSECOND => + TimeType(TimeType.MICROS_PRECISION) case ArrowType.Null.INSTANCE => NullType case yi: ArrowType.Interval if yi.getUnit == IntervalUnit.YEAR_MONTH => YearMonthIntervalType() diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java index 7382d96e20ba..66116d7c952f 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java @@ -182,6 +182,8 @@ void initAccessor(ValueVector vector) { accessor = new TimestampAccessor(timeStampMicroTZVector); } else if (vector instanceof TimeStampMicroVector timeStampMicroVector) { accessor = new TimestampNTZAccessor(timeStampMicroVector); + } else if (vector instanceof TimeNanoVector timeNanoVector) { + accessor = new TimeNanoAccessor(timeNanoVector); } else if (vector instanceof MapVector mapVector) { accessor = new MapAccessor(mapVector); } else if (vector instanceof ListVector listVector) { @@ -522,6 +524,21 @@ final long getLong(int rowId) { } } + static class TimeNanoAccessor extends ArrowVectorAccessor { + + private final TimeNanoVector accessor; + + TimeNanoAccessor(TimeNanoVector vector) { + super(vector); + this.accessor = vector; + } + + @Override + final long getLong(int rowId) { + return accessor.get(rowId); + } + } + static class ArrayAccessor extends ArrowVectorAccessor { private final ListVector accessor; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala index d91b6de9b1df..275fecebdafb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala @@ -68,6 +68,7 @@ object ArrowWriter { case (DateType, vector: DateDayVector) => new DateWriter(vector) case (TimestampType, vector: TimeStampMicroTZVector) => new TimestampWriter(vector) case (TimestampNTZType, vector: TimeStampMicroVector) => new TimestampNTZWriter(vector) + case (_: TimeType, vector: TimeNanoVector) => new TimeWriter(vector) case (ArrayType(_, _), vector: ListVector) => val elementVector = createFieldWriter(vector.getDataVector()) new ArrayWriter(vector, elementVector) @@ -359,6 +360,18 @@ private[arrow] class TimestampNTZWriter( } } +private[arrow] class TimeWriter( + val valueVector: TimeNanoVector) extends ArrowFieldWriter { + + override def setNull(): Unit = { + valueVector.setNull(count) + } + + override def setValue(input: SpecializedGetters, ordinal: Int): Unit = { + valueVector.setSafe(count, input.getLong(ordinal)) + } +} + private[arrow] class ArrayWriter( val valueVector: ListVector, val elementWriter: ArrowFieldWriter) extends ArrowFieldWriter { diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala index 415960e83f9d..03e8d011b8d5 100644 --- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala +++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.connect import java.io.{ByteArrayOutputStream, PrintStream} import java.nio.file.Files -import java.time.DateTimeException +import java.time.{DateTimeException, LocalTime} import java.util.Properties import scala.collection.mutable @@ -1670,6 +1670,12 @@ class ClientE2ETestSuite } checkAnswer(df, (0 until 6).map(i => Row(i))) } + + test("SPARK-52770: Support Time type") { + val df = spark.sql("SELECT TIME '12:13:14'") + + checkAnswer(df, Row(LocalTime.of(12, 13, 14))) + } } private[sql] case class ClassData(a: String, b: Int) diff --git a/sql/connect/common/src/main/protobuf/spark/connect/types.proto b/sql/connect/common/src/main/protobuf/spark/connect/types.proto index db82cbe64a9b..1800e3885774 100644 --- a/sql/connect/common/src/main/protobuf/spark/connect/types.proto +++ b/sql/connect/common/src/main/protobuf/spark/connect/types.proto @@ -69,6 +69,8 @@ message DataType { // UnparsedDataType Unparsed unparsed = 24; + + Time time = 28; } // Reserved for geometry and geography types @@ -127,6 +129,11 @@ message DataType { uint32 type_variation_reference = 1; } + message Time { + optional int32 precision = 1; + uint32 type_variation_reference = 2; + } + message CalendarInterval { uint32 type_variation_reference = 1; } diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala index ceeece073da6..7597a0ceeb8c 100644 --- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala +++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala @@ -200,6 +200,10 @@ object ArrowDeserializers { new LeafFieldDeserializer[LocalDateTime](encoder, v, timeZoneId) { override def value(i: Int): LocalDateTime = reader.getLocalDateTime(i) } + case (LocalTimeEncoder, v: FieldVector) => + new LeafFieldDeserializer[LocalTime](encoder, v, timeZoneId) { + override def value(i: Int): LocalTime = reader.getLocalTime(i) + } case (OptionEncoder(value), v) => val deserializer = deserializerFor(value, v, timeZoneId) diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowSerializer.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowSerializer.scala index d79fb25ec1a0..4acb11f014d1 100644 --- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowSerializer.scala +++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowSerializer.scala @@ -20,7 +20,7 @@ import java.io.{ByteArrayOutputStream, OutputStream} import java.lang.invoke.{MethodHandles, MethodType} import java.math.{BigDecimal => JBigDecimal, BigInteger => JBigInteger} import java.nio.channels.Channels -import java.time.{Duration, Instant, LocalDate, LocalDateTime, Period} +import java.time.{Duration, Instant, LocalDate, LocalDateTime, LocalTime, Period} import java.util.{Map => JMap, Objects} import scala.jdk.CollectionConverters._ @@ -392,6 +392,11 @@ object ArrowSerializer { override def set(index: Int, value: LocalDateTime): Unit = vector.setSafe(index, SparkDateTimeUtils.localDateTimeToMicros(value)) } + case (LocalTimeEncoder, v: TimeNanoVector) => + new FieldSerializer[LocalTime, TimeNanoVector](v) { + override def set(index: Int, value: LocalTime): Unit = + vector.setSafe(index, SparkDateTimeUtils.localTimeToNanos(value)) + } case (OptionEncoder(value), v) => new Serializer { diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala index 3dbfce18e7b4..d8927a6089aa 100644 --- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala +++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala @@ -18,12 +18,12 @@ package org.apache.spark.sql.connect.client.arrow import java.math.{BigDecimal => JBigDecimal} import java.sql.{Date, Timestamp} -import java.time.{Duration, Instant, LocalDate, LocalDateTime, Period, ZoneOffset} +import java.time.{Duration, Instant, LocalDate, LocalDateTime, LocalTime, Period, ZoneOffset} import org.apache.arrow.vector._ import org.apache.arrow.vector.util.Text -import org.apache.spark.sql.catalyst.util.{DateFormatter, SparkIntervalUtils, SparkStringUtils, TimestampFormatter} +import org.apache.spark.sql.catalyst.util.{DateFormatter, SparkIntervalUtils, SparkStringUtils, TimeFormatter, TimestampFormatter} import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_SECOND import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE import org.apache.spark.sql.catalyst.util.SparkDateTimeUtils._ @@ -59,6 +59,7 @@ private[arrow] abstract class ArrowVectorReader { def getInstant(i: Int): java.time.Instant = unsupported() def getLocalDate(i: Int): java.time.LocalDate = unsupported() def getLocalDateTime(i: Int): java.time.LocalDateTime = unsupported() + def getLocalTime(i: Int): java.time.LocalTime = unsupported() private def unsupported(): Nothing = throw new UnsupportedOperationException() } @@ -90,6 +91,7 @@ object ArrowVectorReader { case v: DateDayVector => new DateDayVectorReader(v, timeZoneId) case v: TimeStampMicroTZVector => new TimeStampMicroTZVectorReader(v) case v: TimeStampMicroVector => new TimeStampMicroVectorReader(v, timeZoneId) + case v: TimeNanoVector => new TimeVectorReader(v) case _: NullVector => NullVectorReader case _ => throw new RuntimeException("Unsupported Vector Type: " + vector.getClass) } @@ -275,3 +277,11 @@ private[arrow] class TimeStampMicroVectorReader(v: TimeStampMicroVector, timeZon override def getLocalDateTime(i: Int): LocalDateTime = microsToLocalDateTime(utcMicros(i)) override def getString(i: Int): String = formatter.format(utcMicros(i)) } + +private[arrow] class TimeVectorReader(v: TimeNanoVector) + extends TypedArrowVectorReader[TimeNanoVector](v) { + private lazy val formatter = TimeFormatter.getFractionFormatter() + private def nanos(i: Int): Long = vector.get(i) + override def getLocalTime(i: Int): LocalTime = nanosToLocalTime(nanos(i)) + override def getString(i: Int): String = formatter.format(nanos(i)) +} diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala index 8c83ad3d1f55..4ff555c5645b 100644 --- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala +++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala @@ -53,6 +53,12 @@ object DataTypeProtoConverter { case proto.DataType.KindCase.DATE => DateType case proto.DataType.KindCase.TIMESTAMP => TimestampType case proto.DataType.KindCase.TIMESTAMP_NTZ => TimestampNTZType + case proto.DataType.KindCase.TIME => + if (t.getTime.hasPrecision) { + TimeType(t.getTime.getPrecision) + } else { + TimeType() + } case proto.DataType.KindCase.CALENDAR_INTERVAL => CalendarIntervalType case proto.DataType.KindCase.YEAR_MONTH_INTERVAL => @@ -204,6 +210,12 @@ object DataTypeProtoConverter { case TimestampNTZType => ProtoDataTypes.TimestampNTZType + case TimeType(precision) => + proto.DataType + .newBuilder() + .setTime(proto.DataType.Time.newBuilder().setPrecision(precision).build()) + .build() + case CalendarIntervalType => ProtoDataTypes.CalendarIntervalType case YearMonthIntervalType(startField, endField) => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala index 39c3d8df7550..a7fe6f67974c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala @@ -20,6 +20,7 @@ import java.io.{ByteArrayOutputStream, DataOutputStream, File} import java.nio.charset.StandardCharsets import java.sql.{Date, Timestamp} import java.text.SimpleDateFormat +import java.time.LocalTime import java.util.Locale import com.google.common.io.Files @@ -731,6 +732,43 @@ class ArrowConvertersSuite extends SharedSparkSession { } } + test("time type conversion") { + val json = + s""" + |{ + | "schema" : { + | "fields" : [ { + | "name" : "time", + | "type" : { + | "name" : "time", + | "unit" : "NANOSECOND", + | "bitWidth" : 64 + | }, + | "nullable" : true, + | "children" : [ ] + | } ] + | }, + | "batches" : [ { + | "count" : 3, + | "columns" : [ { + | "name" : "time", + | "count" : 3, + | "VALIDITY" : [ 1, 1, 1 ], + | "DATA" : [ 0, 43200000000000, 3723123456789 ] + | } ] + | } ] + |} + """.stripMargin + + val t1 = LocalTime.of(0, 0, 0) + val t2 = LocalTime.of(12, 0, 0) + val t3 = LocalTime.of(1, 2, 3, 123456789) + + val df = Seq(t1, t2, t3).toDF("time") + + collectAndValidate(df, json, "timeData.json") + } + test("floating-point NaN") { val json = s""" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala index acf258a373c3..8bdc6ccc9a8d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala @@ -38,6 +38,7 @@ class ArrowWriterSuite extends SparkFunSuite { val datatype = dt match { case _: DayTimeIntervalType => DayTimeIntervalType() case _: YearMonthIntervalType => YearMonthIntervalType() + case _: TimeType => TimeType() case tpe => tpe } val schema = new StructType().add("value", datatype, nullable = true) @@ -67,6 +68,7 @@ class ArrowWriterSuite extends SparkFunSuite { case DateType => reader.getInt(rowId) case TimestampType => reader.getLong(rowId) case TimestampNTZType => reader.getLong(rowId) + case _: TimeType => reader.getLong(rowId) case _: YearMonthIntervalType => reader.getInt(rowId) case _: DayTimeIntervalType => reader.getLong(rowId) case CalendarIntervalType => reader.getInterval(rowId) @@ -91,6 +93,7 @@ class ArrowWriterSuite extends SparkFunSuite { check(DateType, Seq(0, 1, 2, null, 4)) check(TimestampType, Seq(0L, 3.6e9.toLong, null, 8.64e10.toLong), "America/Los_Angeles") check(TimestampNTZType, Seq(0L, 3.6e9.toLong, null, 8.64e10.toLong)) + DataTypeTestUtils.timeTypes.foreach(check(_, Seq(0L, 4.32e4.toLong, null, 3723123456789L))) check(NullType, Seq(null, null, null)) DataTypeTestUtils.yearMonthIntervalTypes .foreach(check(_, Seq(null, 0, 1, -1, Int.MaxValue, Int.MinValue))) @@ -109,6 +112,7 @@ class ArrowWriterSuite extends SparkFunSuite { val avroDatatype = dt match { case _: DayTimeIntervalType => DayTimeIntervalType() case _: YearMonthIntervalType => YearMonthIntervalType() + case _: TimeType => TimeType() case tpe => tpe } val schema = new StructType().add("value", avroDatatype, nullable = false) @@ -132,6 +136,7 @@ class ArrowWriterSuite extends SparkFunSuite { case DateType => reader.getInts(0, data.size) case TimestampType => reader.getLongs(0, data.size) case TimestampNTZType => reader.getLongs(0, data.size) + case _: TimeType => reader.getLongs(0, data.size) case _: YearMonthIntervalType => reader.getInts(0, data.size) case _: DayTimeIntervalType => reader.getLongs(0, data.size) } @@ -149,6 +154,7 @@ class ArrowWriterSuite extends SparkFunSuite { check(DateType, (0 until 10)) check(TimestampType, (0 until 10).map(_ * 4.32e10.toLong), "America/Los_Angeles") check(TimestampNTZType, (0 until 10).map(_ * 4.32e10.toLong)) + DataTypeTestUtils.timeTypes.foreach(check(_, (0 until 10).map(_ * 4.32e10.toLong))) DataTypeTestUtils.yearMonthIntervalTypes.foreach(check(_, (0 until 14))) DataTypeTestUtils.dayTimeIntervalTypes.foreach(check(_, (-10 until 10).map(_ * 1000.toLong))) }