-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-30808][SQL] Enable Java 8 time API in Thrift server #27552
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
916838a
d98cdbc
580dd09
deaef58
804e709
80f4c89
d3131a5
e68e622
e491f96
95597a7
2086bbf
69f63b2
cdb322d
5ac5e77
ce77628
f9112b7
2129f30
902f9d8
880b1de
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,9 +21,10 @@ import java.nio.charset.StandardCharsets | |
| import java.sql.{Date, Timestamp} | ||
| import java.time.{Instant, LocalDate} | ||
|
|
||
| import org.apache.spark.sql.Row | ||
| import org.apache.spark.sql.{Dataset, Row} | ||
| import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter} | ||
| import org.apache.spark.sql.execution.command.{DescribeCommandBase, ExecutedCommandExec, ShowTablesCommand} | ||
| import org.apache.spark.sql.functions._ | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.types._ | ||
| import org.apache.spark.unsafe.types.CalendarInterval | ||
|
|
@@ -36,27 +37,43 @@ object HiveResult { | |
| * Returns the result as a hive compatible sequence of strings. This is used in tests and | ||
| * `SparkSQLDriver` for CLI applications. | ||
| */ | ||
| def hiveResultString(executedPlan: SparkPlan): Seq[String] = executedPlan match { | ||
| case ExecutedCommandExec(_: DescribeCommandBase) => | ||
| // If it is a describe command for a Hive table, we want to have the output format | ||
| // be similar with Hive. | ||
| executedPlan.executeCollectPublic().map { | ||
| case Row(name: String, dataType: String, comment) => | ||
| Seq(name, dataType, | ||
| Option(comment.asInstanceOf[String]).getOrElse("")) | ||
| .map(s => String.format(s"%-20s", s)) | ||
| .mkString("\t") | ||
| } | ||
| // SHOW TABLES in Hive only output table names, while ours output database, table name, isTemp. | ||
| case command @ ExecutedCommandExec(s: ShowTablesCommand) if !s.isExtended => | ||
| command.executeCollect().map(_.getString(1)) | ||
| case other => | ||
| val result: Seq[Seq[Any]] = other.executeCollectPublic().map(_.toSeq).toSeq | ||
| // We need the types so we can output struct field names | ||
| val types = executedPlan.output.map(_.dataType) | ||
| // Reformat to match hive tab delimited output. | ||
| result.map(_.zip(types).map(e => toHiveString(e))) | ||
| .map(_.mkString("\t")) | ||
| def hiveResultString(ds: Dataset[_]): Seq[String] = { | ||
| val executedPlan = ds.queryExecution.executedPlan | ||
| executedPlan match { | ||
| case ExecutedCommandExec(_: DescribeCommandBase) => | ||
| // If it is a describe command for a Hive table, we want to have the output format | ||
| // be similar with Hive. | ||
| executedPlan.executeCollectPublic().map { | ||
| case Row(name: String, dataType: String, comment) => | ||
| Seq(name, dataType, | ||
| Option(comment.asInstanceOf[String]).getOrElse("")) | ||
| .map(s => String.format(s"%-20s", s)) | ||
| .mkString("\t") | ||
| } | ||
| // SHOW TABLES in Hive only output table names, | ||
| // while ours output database, table name, isTemp. | ||
| case command @ ExecutedCommandExec(s: ShowTablesCommand) if !s.isExtended => | ||
| command.executeCollect().map(_.getString(1)) | ||
| case _ => | ||
| val sessionWithJava8DatetimeEnabled = { | ||
| val cloned = ds.sparkSession.cloneSession() | ||
| cloned.conf.set(SQLConf.DATETIME_JAVA8API_ENABLED.key, true) | ||
| cloned | ||
| } | ||
| sessionWithJava8DatetimeEnabled.withActive { | ||
| // We cannot collect the original dataset because its encoders could be created | ||
| // with disabled Java 8 date-time API. | ||
| val result: Seq[Seq[Any]] = Dataset.ofRows(ds.sparkSession, ds.logicalPlan) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. found a problem. |
||
| .queryExecution | ||
| .executedPlan | ||
| .executeCollectPublic().map(_.toSeq).toSeq | ||
| // We need the types so we can output struct field names | ||
| val types = executedPlan.output.map(_.dataType) | ||
| // Reformat to match hive tab delimited output. | ||
| result.map(_.zip(types).map(e => toHiveString(e))) | ||
| .map(_.mkString("\t")) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private lazy val zoneId = DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -378,7 +378,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { | |
| localSparkSession.conf.set(SQLConf.ANSI_ENABLED.key, true) | ||
| case _ => | ||
| } | ||
| localSparkSession.conf.set(SQLConf.DATETIME_JAVA8API_ENABLED.key, true) | ||
|
|
||
| if (configSet.nonEmpty) { | ||
| // Execute the list of set operation in order to add the desired configs | ||
|
|
@@ -512,7 +511,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { | |
| val schema = df.schema.catalogString | ||
| // Get answer, but also get rid of the #1234 expression ids that show up in explain plans | ||
| val answer = SQLExecution.withNewExecutionId(df.queryExecution, Some(sql)) { | ||
| hiveResultString(df.queryExecution.executedPlan).map(replaceNotIncludedMsg) | ||
| hiveResultString(df).map(replaceNotIncludedMsg) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in We should follow pgsql and return java 8 datetime when the config is enabled. https://jdbc.postgresql.org/documentation/head/8-date-time.html |
||
| } | ||
|
|
||
| // If the output is not pre-sorted, sort it. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.thriftserver | |
|
|
||
| import java.security.PrivilegedExceptionAction | ||
| import java.sql.{Date, Timestamp} | ||
| import java.time.{Instant, LocalDate} | ||
| import java.util.{Arrays, Map => JMap, UUID} | ||
| import java.util.concurrent.RejectedExecutionException | ||
|
|
||
|
|
@@ -178,7 +179,14 @@ private[hive] class SparkExecuteStatementOperation( | |
| } | ||
| curCol += 1 | ||
| } | ||
| resultRowSet.addRow(row.toArray.asInstanceOf[Array[Object]]) | ||
| // Convert date-time instances to types that are acceptable by Hive libs | ||
| // used in conversions to strings. | ||
| val resultRow = row.map { | ||
cloud-fan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| case i: Instant => Timestamp.from(i) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There seems no java8 datetime values to be add to the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you help fix it? I think we should output java8 datetime values if the config is enabled.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are limited by |
||
| case ld: LocalDate => Date.valueOf(ld) | ||
| case other => other | ||
| }.toArray.asInstanceOf[Array[Object]] | ||
| resultRowSet.addRow(resultRow) | ||
| curRow += 1 | ||
| resultOffset += 1 | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why is this always
true?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the old
Date/Timestampdoesn't follow the new calendar and may produce wrong string for some date/timestamp values.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh wait, we format
Date/Timestampby our own formatter, so this should be no problem.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I rm this line and run
SQLQueryTestSuitewith cases above, the results are the same. Or does this problem only exists forspark-sqlscript?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Only when thrift-server is involved in the loop.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I also pass these tests through
ThriftServerQueryTestSuiteThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also correct with
bin/spark-sqlThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yea I tested it too and looks fine. Maybe some refactor of how to format old
Date/Timestampfixes it already.@yaooqinn can you send a PR to revert it? Let's see if all tests pass.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK