From 1c6c93d394c69dab0cd7bc500b04938542963c5a Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Sat, 10 Apr 2021 12:27:38 +0300 Subject: [PATCH 1/3] Transfer ANSI intervals via Hive Thrift server --- .../SparkExecuteStatementOperation.scala | 10 ++++++++++ .../SparkThriftServerProtocolVersionsSuite.scala | 13 +++++++++++++ 2 files changed, 23 insertions(+) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala index 8ca0ab91a73f7..58ed4c23b810b 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala @@ -25,6 +25,7 @@ import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import scala.util.control.NonFatal +import org.apache.hadoop.hive.common.`type`.{HiveIntervalDayTime, HiveIntervalYearMonth} import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.shims.Utils import org.apache.hive.service.cli._ @@ -33,6 +34,7 @@ import org.apache.hive.service.cli.session.HiveSession import org.apache.spark.internal.Logging import org.apache.spark.sql.{DataFrame, Row => SparkRow, SQLContext} +import org.apache.spark.sql.catalyst.util.IntervalUtils import org.apache.spark.sql.execution.HiveResult.{getTimeFormatters, toHiveString, TimeFormatters} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.VariableSubstitution @@ -122,6 +124,12 @@ private[hive] class SparkExecuteStatementOperation( timeFormatters) case _: ArrayType | _: StructType | _: MapType | _: UserDefinedType[_] => to += toHiveString((from.get(ordinal), dataTypes(ordinal)), false, timeFormatters) + case YearMonthIntervalType => + val period = from.getAs[java.time.Period](ordinal) + to += new HiveIntervalYearMonth(IntervalUtils.periodToMonths(period)) + case DayTimeIntervalType => + val duration = from.getAs[java.time.Duration](ordinal) + to += new HiveIntervalDayTime(duration.getSeconds, duration.getNano) } } @@ -377,6 +385,8 @@ object SparkExecuteStatementOperation { val attrTypeString = field.dataType match { case NullType => "void" case CalendarIntervalType => StringType.catalogString + case YearMonthIntervalType => "interval_year_month" + case DayTimeIntervalType => "interval_day_time" case other => other.catalogString } new FieldSchema(field.name, attrTypeString, field.getComment.getOrElse("")) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala index fd4d7231e8989..363436679a6c3 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.thriftserver import java.sql.{Date, Timestamp} import java.util.{List => JList, Properties} +import org.apache.hadoop.hive.common.`type`.HiveIntervalDayTime import org.apache.hive.jdbc.{HiveConnection, HiveQueryResultSet} import org.apache.hive.service.auth.PlainSaslHelper import org.apache.hive.service.cli.GetInfoType @@ -458,5 +459,17 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftServer2TestBase { } } } + + test(s"SPARK-35017: $version get day-time interval type") { + testExecuteStatementWithProtocolVersion( + version, "SELECT date'2021-01-01' - date'2020-12-31' AS dt") { rs => + assert(rs.next()) + assert(rs.getObject(1) === new HiveIntervalDayTime(1, 0, 0, 0, 0)) + val metaData = rs.getMetaData + assert(metaData.getColumnName(1) === "dt") + assert(metaData.getColumnTypeName(1) === "interval_day_time") + assert(metaData.getColumnType(1) === java.sql.Types.OTHER) + } + } } } From 26a6235badf21646c25e6032da556583fba96ad7 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Sun, 11 Apr 2021 18:40:27 +0300 Subject: [PATCH 2/3] Serialize ANSI intervals as strings --- .../java/org/apache/hive/service/cli/ColumnValue.java | 6 ++---- .../thriftserver/SparkExecuteStatementOperation.scala | 11 ++--------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnValue.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnValue.java index 85adf55df15e0..44d9e8a296452 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnValue.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnValue.java @@ -168,10 +168,6 @@ public static TColumnValue toTColumnValue(TypeDescriptor typeDescriptor, Object case TIMESTAMP_TYPE: // SPARK-31859, SPARK-31861: converted to string already in SparkExecuteStatementOperation return stringValue((String)value); - case INTERVAL_YEAR_MONTH_TYPE: - return stringValue((HiveIntervalYearMonth) value); - case INTERVAL_DAY_TIME_TYPE: - return stringValue((HiveIntervalDayTime) value); case DECIMAL_TYPE: String plainStr = value == null ? null : ((BigDecimal)value).toPlainString(); return stringValue(plainStr); @@ -183,6 +179,8 @@ public static TColumnValue toTColumnValue(TypeDescriptor typeDescriptor, Object case STRUCT_TYPE: case UNION_TYPE: case USER_DEFINED_TYPE: + case INTERVAL_YEAR_MONTH_TYPE: + case INTERVAL_DAY_TIME_TYPE: return stringValue((String)value); case NULL_TYPE: return stringValue((String)value); diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala index 58ed4c23b810b..6196f941c858a 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala @@ -25,7 +25,6 @@ import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import scala.util.control.NonFatal -import org.apache.hadoop.hive.common.`type`.{HiveIntervalDayTime, HiveIntervalYearMonth} import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.shims.Utils import org.apache.hive.service.cli._ @@ -34,7 +33,6 @@ import org.apache.hive.service.cli.session.HiveSession import org.apache.spark.internal.Logging import org.apache.spark.sql.{DataFrame, Row => SparkRow, SQLContext} -import org.apache.spark.sql.catalyst.util.IntervalUtils import org.apache.spark.sql.execution.HiveResult.{getTimeFormatters, toHiveString, TimeFormatters} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.VariableSubstitution @@ -122,14 +120,9 @@ private[hive] class SparkExecuteStatementOperation( (from.getAs[CalendarInterval](ordinal), CalendarIntervalType), false, timeFormatters) - case _: ArrayType | _: StructType | _: MapType | _: UserDefinedType[_] => + case _: ArrayType | _: StructType | _: MapType | _: UserDefinedType[_] | + YearMonthIntervalType | DayTimeIntervalType => to += toHiveString((from.get(ordinal), dataTypes(ordinal)), false, timeFormatters) - case YearMonthIntervalType => - val period = from.getAs[java.time.Period](ordinal) - to += new HiveIntervalYearMonth(IntervalUtils.periodToMonths(period)) - case DayTimeIntervalType => - val duration = from.getAs[java.time.Duration](ordinal) - to += new HiveIntervalDayTime(duration.getSeconds, duration.getNano) } } From feeeea5a2d2672c274ba626ddf1c31f6d082efc4 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Sun, 11 Apr 2021 18:45:03 +0300 Subject: [PATCH 3/3] Extend supportedType --- .../sql/hive/thriftserver/SparkGetTypeInfoOperation.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala index bd6feeaff08e8..cecb0dec72c80 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala @@ -99,6 +99,7 @@ private[hive] object SparkGetTypeInfoUtil { TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE, FLOAT_TYPE, DOUBLE_TYPE, DECIMAL_TYPE, DATE_TYPE, TIMESTAMP_TYPE, - ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE, CHAR_TYPE, VARCHAR_TYPE) + ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE, CHAR_TYPE, VARCHAR_TYPE, + INTERVAL_YEAR_MONTH_TYPE, INTERVAL_DAY_TIME_TYPE) } }