From 3991b67b930cf7102b338b3696e513a93497b0fe Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Fri, 9 Aug 2019 15:58:25 +0500 Subject: [PATCH 01/18] Support microseconds arg at extract --- .../expressions/datetimeExpressions.scala | 19 +++++++++++++++++++ .../sql/catalyst/parser/AstBuilder.scala | 2 ++ .../sql/catalyst/util/DateTimeUtils.scala | 8 ++++++++ .../resources/sql-tests/inputs/extract.sql | 2 ++ .../sql-tests/results/extract.sql.out | 14 +++++++++++--- 5 files changed, 42 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index e50abebe57987..5c6027dbe99c0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1810,3 +1810,22 @@ case class MakeTimestamp( override def prettyName: String = "make_timestamp" } + +case class Microseconds(child: Expression, timeZoneId: Option[String] = None) + extends UnaryExpression with ImplicitCastInputTypes with TimeZoneAwareExpression { + + override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) + override def dataType: DataType = IntegerType + override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = + copy(timeZoneId = Option(timeZoneId)) + + override protected def nullSafeEval(timestamp: Any): Any = { + DateTimeUtils.getMicroseconds(timestamp.asInstanceOf[Long], timeZone) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val tz = ctx.addReferenceObj("timeZone", timeZone) + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, c => s"$dtu.getMicroseconds($c, $tz)") + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 49ca09d9ef076..614ad4b0045f2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1420,6 +1420,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging Minute(expression(ctx.source)) case "SECOND" => Second(expression(ctx.source)) + case "MICROSECONDS" => + Microseconds(expression(ctx.source)) case other => throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 10a7f9bd550e2..64c7756ac0e0a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -455,6 +455,14 @@ object DateTimeUtils { (MICROSECONDS.toSeconds(localTimestamp(microsec, timeZone)) % 60).toInt } + /** + * Returns seconds, including fractional parts, multiplied by 1 000 000. The timestamp + * is expressed in microseconds since the epoch. + */ + def getMicroseconds(timestamp: SQLTimestamp, timeZone: TimeZone): Int = { + Math.floorMod(localTimestamp(timestamp, timeZone), MICROS_PER_SECOND * 60).toInt + } + /** * Returns the 'day in year' value for the given date. The date is expressed in days * since 1.1.1970. diff --git a/sql/core/src/test/resources/sql-tests/inputs/extract.sql b/sql/core/src/test/resources/sql-tests/inputs/extract.sql index 5dd3e6686f1cc..72d104ab7bf73 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/extract.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/extract.sql @@ -24,4 +24,6 @@ select extract(minute from c) from t; select extract(second from c) from t; +select extract(microseconds from c) from t; + select extract(not_supported from c) from t; diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out index 0ca7bdc09b019..3759fac835ef1 100644 --- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 14 +-- Number of queries: 15 -- !query 0 @@ -107,10 +107,18 @@ struct -- !query 13 -select extract(not_supported from c) from t +select extract(microseconds from c) from t -- !query 13 schema -struct<> +struct -- !query 13 output +9123456 + + +-- !query 14 +select extract(not_supported from c) from t +-- !query 14 schema +struct<> +-- !query 14 output org.apache.spark.sql.catalyst.parser.ParseException Literals of type 'NOT_SUPPORTED' are currently not supported.(line 1, pos 7) From 330a5f228d432ce7cd27a1c00cabaef9be7fb321 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Fri, 9 Aug 2019 17:38:40 +0500 Subject: [PATCH 02/18] Support milliseconds arg at extract --- .../expressions/datetimeExpressions.scala | 57 ++++++++++++------- .../sql/catalyst/parser/AstBuilder.scala | 2 + .../sql/catalyst/util/DateTimeUtils.scala | 8 +++ .../resources/sql-tests/inputs/extract.sql | 2 + .../sql-tests/results/extract.sql.out | 20 +++++-- 5 files changed, 64 insertions(+), 25 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 5c6027dbe99c0..dbbc648658559 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -302,6 +302,44 @@ case class Second(child: Expression, timeZoneId: Option[String] = None) } } +case class Milliseconds(child: Expression, timeZoneId: Option[String] = None) + extends UnaryExpression with ImplicitCastInputTypes with TimeZoneAwareExpression { + + override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) + override def dataType: DataType = IntegerType + override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = + copy(timeZoneId = Option(timeZoneId)) + + override protected def nullSafeEval(timestamp: Any): Any = { + DateTimeUtils.getMilliseconds(timestamp.asInstanceOf[Long], timeZone) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val tz = ctx.addReferenceObj("timeZone", timeZone) + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, c => s"$dtu.getMilliseconds($c, $tz)") + } +} + +case class Microseconds(child: Expression, timeZoneId: Option[String] = None) + extends UnaryExpression with ImplicitCastInputTypes with TimeZoneAwareExpression { + + override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) + override def dataType: DataType = IntegerType + override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = + copy(timeZoneId = Option(timeZoneId)) + + override protected def nullSafeEval(timestamp: Any): Any = { + DateTimeUtils.getMicroseconds(timestamp.asInstanceOf[Long], timeZone) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val tz = ctx.addReferenceObj("timeZone", timeZone) + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, c => s"$dtu.getMicroseconds($c, $tz)") + } +} + @ExpressionDescription( usage = "_FUNC_(date) - Returns the day of year of the date/timestamp.", examples = """ @@ -1810,22 +1848,3 @@ case class MakeTimestamp( override def prettyName: String = "make_timestamp" } - -case class Microseconds(child: Expression, timeZoneId: Option[String] = None) - extends UnaryExpression with ImplicitCastInputTypes with TimeZoneAwareExpression { - - override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) - override def dataType: DataType = IntegerType - override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = - copy(timeZoneId = Option(timeZoneId)) - - override protected def nullSafeEval(timestamp: Any): Any = { - DateTimeUtils.getMicroseconds(timestamp.asInstanceOf[Long], timeZone) - } - - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val tz = ctx.addReferenceObj("timeZone", timeZone) - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - defineCodeGen(ctx, ev, c => s"$dtu.getMicroseconds($c, $tz)") - } -} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 614ad4b0045f2..713e87f166987 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1420,6 +1420,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging Minute(expression(ctx.source)) case "SECOND" => Second(expression(ctx.source)) + case "MILLISECONDS" => + Milliseconds(expression(ctx.source)) case "MICROSECONDS" => Microseconds(expression(ctx.source)) case other => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 64c7756ac0e0a..0a81a9c441c26 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -455,6 +455,14 @@ object DateTimeUtils { (MICROSECONDS.toSeconds(localTimestamp(microsec, timeZone)) % 60).toInt } + /** + * Returns seconds, including fractional parts, multiplied by 1 000. The timestamp + * is expressed in microseconds since the epoch. + */ + def getMilliseconds(timestamp: SQLTimestamp, timeZone: TimeZone): Int = { + getMicroseconds(timestamp, timeZone) / MICROS_PER_MILLIS.toInt + } + /** * Returns seconds, including fractional parts, multiplied by 1 000 000. The timestamp * is expressed in microseconds since the epoch. diff --git a/sql/core/src/test/resources/sql-tests/inputs/extract.sql b/sql/core/src/test/resources/sql-tests/inputs/extract.sql index 72d104ab7bf73..02e9ea497378f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/extract.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/extract.sql @@ -24,6 +24,8 @@ select extract(minute from c) from t; select extract(second from c) from t; +select extract(milliseconds from c) from t; + select extract(microseconds from c) from t; select extract(not_supported from c) from t; diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out index 3759fac835ef1..c7d74161742c1 100644 --- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 15 +-- Number of queries: 16 -- !query 0 @@ -107,18 +107,26 @@ struct -- !query 13 -select extract(microseconds from c) from t +select extract(milliseconds from c) from t -- !query 13 schema -struct +struct -- !query 13 output -9123456 +9123 -- !query 14 -select extract(not_supported from c) from t +select extract(microseconds from c) from t -- !query 14 schema -struct<> +struct -- !query 14 output +9123456 + + +-- !query 15 +select extract(not_supported from c) from t +-- !query 15 schema +struct<> +-- !query 15 output org.apache.spark.sql.catalyst.parser.ParseException Literals of type 'NOT_SUPPORTED' are currently not supported.(line 1, pos 7) From 9c7de57794533a9939dd14f73c17be722c1a2dfd Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sat, 10 Aug 2019 00:18:46 +0500 Subject: [PATCH 03/18] Support epoch --- .../expressions/datetimeExpressions.scala | 19 +++++++ .../sql/catalyst/parser/AstBuilder.scala | 2 + .../sql/catalyst/util/DateTimeUtils.scala | 5 ++ .../expressions/DateExpressionsSuite.scala | 14 +++++- .../resources/sql-tests/inputs/pgSQL/date.sql | 6 +-- .../sql-tests/results/pgSQL/date.sql.out | 50 ++++++++++++------- 6 files changed, 75 insertions(+), 21 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index dbbc648658559..597a3d32db1d7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1848,3 +1848,22 @@ case class MakeTimestamp( override def prettyName: String = "make_timestamp" } + +case class Epoch(child: Expression, timeZoneId: Option[String] = None) + extends UnaryExpression with ImplicitCastInputTypes with TimeZoneAwareExpression { + + override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) + override def dataType: DataType = DoubleType + override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = + copy(timeZoneId = Option(timeZoneId)) + + override protected def nullSafeEval(timestamp: Any): Any = { + DateTimeUtils.getEpoch(timestamp.asInstanceOf[Long], zoneId) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName) + defineCodeGen(ctx, ev, c => s"$dtu.getEpoch($c, $zid)") + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 713e87f166987..c8bc9e796e66e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1424,6 +1424,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging Milliseconds(expression(ctx.source)) case "MICROSECONDS" => Microseconds(expression(ctx.source)) + case "EPOCH" => + Epoch(expression(ctx.source)) case other => throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 0a81a9c441c26..ed345a50fd508 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -794,4 +794,9 @@ object DateTimeUtils { def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = { convertTz(time, getTimeZone(timeZone), TimeZoneGMT) } + + def getEpoch(time: SQLTimestamp, zoneId: ZoneId): Double = { + val offset = zoneId.getRules.getOffset(microsToInstant(time)) + time.toDouble / MICROS_PER_SECOND + offset.getTotalSeconds + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 30e10c5527ad6..541d50ff25060 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import java.text.SimpleDateFormat -import java.time.{ZoneId, ZoneOffset} +import java.time.{LocalDateTime, ZoneId, ZoneOffset} import java.util.{Calendar, Locale, TimeZone} import java.util.concurrent.TimeUnit import java.util.concurrent.TimeUnit._ @@ -960,4 +960,16 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(makeTimestampExpr, Timestamp.valueOf("2019-07-01 00:00:00")) checkEvaluation(makeTimestampExpr.copy(sec = Literal(60.5)), null) } + + test("epoch") { + val zoneId = ZoneId.systemDefault() + val secFractions = 0.123456 + val timestamp = Epoch(MakeTimestamp( + Literal(2019), Literal(8), Literal(9), Literal(0), Literal(0), Literal(secFractions), + Some(Literal(zoneId.getId)))) + val instant = LocalDateTime.of(2019, 8, 9, 0, 0, 0, 123456) + .atZone(zoneId).toInstant + val expected = instant.getEpochSecond + secFractions + checkEvaluation(timestamp, expected) + } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql index 6cd3856e8672e..4f4b3b5f28a6b 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql @@ -228,9 +228,9 @@ SELECT f1 - date '2000-01-01' AS `Days From 2K` FROM DATE_TBL; -- test extract! -- -- epoch --- --- SELECT EXTRACT(EPOCH FROM DATE '1970-01-01'); -- 0 --- SELECT EXTRACT(EPOCH FROM TIMESTAMP '1970-01-01'); -- 0 + +SELECT EXTRACT(EPOCH FROM DATE '1970-01-01'); +SELECT EXTRACT(EPOCH FROM TIMESTAMP '1970-01-01'); -- SELECT EXTRACT(EPOCH FROM TIMESTAMPTZ '1970-01-01+00'); -- 0 -- -- century diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out index 46101ebce8113..3e80c6d6e932d 100644 --- a/sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 53 +-- Number of queries: 55 -- !query 0 @@ -502,48 +502,64 @@ struct -- !query 47 -select make_date(2013, 7, 15) +SELECT EXTRACT(EPOCH FROM DATE '1970-01-01') -- !query 47 schema -struct +struct -- !query 47 output -2013-07-15 +0.0 -- !query 48 -select make_date(-44, 3, 15) +SELECT EXTRACT(EPOCH FROM TIMESTAMP '1970-01-01') -- !query 48 schema -struct +struct -- !query 48 output --0044-03-15 +0.0 -- !query 49 -select make_date(2013, 2, 30) +select make_date(2013, 7, 15) -- !query 49 schema -struct +struct -- !query 49 output -NULL +2013-07-15 -- !query 50 -select make_date(2013, 13, 1) +select make_date(-44, 3, 15) -- !query 50 schema -struct +struct -- !query 50 output -NULL +-0044-03-15 -- !query 51 -select make_date(2013, 11, -1) +select make_date(2013, 2, 30) -- !query 51 schema -struct +struct -- !query 51 output NULL -- !query 52 -DROP TABLE DATE_TBL +select make_date(2013, 13, 1) -- !query 52 schema -struct<> +struct -- !query 52 output +NULL + + +-- !query 53 +select make_date(2013, 11, -1) +-- !query 53 schema +struct +-- !query 53 output +NULL + + +-- !query 54 +DROP TABLE DATE_TBL +-- !query 54 schema +struct<> +-- !query 54 output From 01862743e1c518cf6cdd0734a6f86fab9171bf2f Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sat, 10 Aug 2019 00:37:44 +0500 Subject: [PATCH 04/18] Switch to decimal instead of double --- .../catalyst/expressions/datetimeExpressions.scala | 2 +- .../spark/sql/catalyst/util/DateTimeUtils.scala | 8 ++++++-- .../test/resources/sql-tests/inputs/extract.sql | 2 ++ .../resources/sql-tests/results/extract.sql.out | 14 +++++++++++--- .../resources/sql-tests/results/pgSQL/date.sql.out | 8 ++++---- 5 files changed, 24 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 597a3d32db1d7..af585b09aaf8e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1853,7 +1853,7 @@ case class Epoch(child: Expression, timeZoneId: Option[String] = None) extends UnaryExpression with ImplicitCastInputTypes with TimeZoneAwareExpression { override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) - override def dataType: DataType = DoubleType + override def dataType: DataType = DecimalType(20, 6) override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index ed345a50fd508..585e1c5c6455d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -795,8 +795,12 @@ object DateTimeUtils { convertTz(time, getTimeZone(timeZone), TimeZoneGMT) } - def getEpoch(time: SQLTimestamp, zoneId: ZoneId): Double = { + /** + * Returns the number of seconds with fractional part in microsecond precision + * since 1970-01-01 00:00:00 local time. + */ + def getEpoch(time: SQLTimestamp, zoneId: ZoneId): BigDecimal = { val offset = zoneId.getRules.getOffset(microsToInstant(time)) - time.toDouble / MICROS_PER_SECOND + offset.getTotalSeconds + BigDecimal(time) / MICROS_PER_SECOND + offset.getTotalSeconds } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/extract.sql b/sql/core/src/test/resources/sql-tests/inputs/extract.sql index 02e9ea497378f..23953b0d7675e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/extract.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/extract.sql @@ -28,4 +28,6 @@ select extract(milliseconds from c) from t; select extract(microseconds from c) from t; +select extract(epoch from c) from t; + select extract(not_supported from c) from t; diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out index c7d74161742c1..c921d3e2c22a9 100644 --- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 16 +-- Number of queries: 17 -- !query 0 @@ -123,10 +123,18 @@ struct -- !query 15 -select extract(not_supported from c) from t +select extract(epoch from c) from t -- !query 15 schema -struct<> +struct -- !query 15 output +1304665689.123456 + + +-- !query 16 +select extract(not_supported from c) from t +-- !query 16 schema +struct<> +-- !query 16 output org.apache.spark.sql.catalyst.parser.ParseException Literals of type 'NOT_SUPPORTED' are currently not supported.(line 1, pos 7) diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out index 3e80c6d6e932d..72f0980db95a3 100644 --- a/sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out @@ -504,17 +504,17 @@ struct -- !query 47 SELECT EXTRACT(EPOCH FROM DATE '1970-01-01') -- !query 47 schema -struct +struct -- !query 47 output -0.0 +0 -- !query 48 SELECT EXTRACT(EPOCH FROM TIMESTAMP '1970-01-01') -- !query 48 schema -struct +struct -- !query 48 output -0.0 +0 -- !query 49 From 7bbfe1f8aa4789e239d1ac9e1ff2677998e02e76 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sat, 10 Aug 2019 00:59:37 +0500 Subject: [PATCH 05/18] Support isoyear at extract --- .../expressions/datetimeExpressions.scala | 16 ++++ .../sql/catalyst/parser/AstBuilder.scala | 2 + .../sql/catalyst/util/DateTimeUtils.scala | 8 ++ .../expressions/DateExpressionsSuite.scala | 5 + .../resources/sql-tests/inputs/extract.sql | 2 + .../sql-tests/results/extract.sql.out | 94 ++++++++++--------- 6 files changed, 84 insertions(+), 43 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index af585b09aaf8e..b4bf085e481ed 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -388,6 +388,22 @@ case class Year(child: Expression) extends UnaryExpression with ImplicitCastInpu } } +case class IsoYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(date: Any): Any = { + DateTimeUtils.getIsoYear(date.asInstanceOf[Int]) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, c => s"$dtu.getIsoYear($c)") + } +} + @ExpressionDescription( usage = "_FUNC_(date) - Returns the quarter of the year for date, in the range 1 to 4.", examples = """ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index c8bc9e796e66e..f647bcb82b43f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1398,6 +1398,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging ctx.field.getText.toUpperCase(Locale.ROOT) match { case "YEAR" => Year(expression(ctx.source)) + case "ISOYEAR" => + IsoYear(expression(ctx.source)) case "QUARTER" => Quarter(expression(ctx.source)) case "MONTH" => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 585e1c5c6455d..9d22813450e9a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -487,6 +487,14 @@ object DateTimeUtils { LocalDate.ofEpochDay(date).getYear } + /** + * Returns the year which conforms to ISO 8601. Each ISO 8601 week-numbering + * year begins with the Monday of the week containing the 4th of January. + */ + def getIsoYear(date: SQLDate): Int = { + daysToLocalDate(date).get(IsoFields.WEEK_BASED_YEAR) + } + /** * Returns the quarter for the given date. The date is expressed in days * since 1.1.1970. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 541d50ff25060..df7583e41ece0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -972,4 +972,9 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val expected = instant.getEpochSecond + secFractions checkEvaluation(timestamp, expected) } + + test("ISO 8601 week-numbering year") { + checkEvaluation(IsoYear(MakeDate(Literal(2006), Literal(1), Literal(1))), 2005) + checkEvaluation(IsoYear(MakeDate(Literal(2006), Literal(1), Literal(2))), 2006) + } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/extract.sql b/sql/core/src/test/resources/sql-tests/inputs/extract.sql index 23953b0d7675e..a0cac484bb431 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/extract.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/extract.sql @@ -2,6 +2,8 @@ CREATE TEMPORARY VIEW t AS select '2011-05-06 07:08:09.1234567' as c; select extract(year from c) from t; +select extract(isoyear from c) from t; + select extract(quarter from c) from t; select extract(month from c) from t; diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out index c921d3e2c22a9..87b1782c4160e 100644 --- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 17 +-- Number of queries: 18 -- !query 0 @@ -19,122 +19,130 @@ struct -- !query 2 -select extract(quarter from c) from t +select extract(isoyear from c) from t -- !query 2 schema -struct +struct -- !query 2 output -2 +2011 -- !query 3 -select extract(month from c) from t +select extract(quarter from c) from t -- !query 3 schema -struct +struct -- !query 3 output -5 +2 -- !query 4 -select extract(week from c) from t +select extract(month from c) from t -- !query 4 schema -struct +struct -- !query 4 output -18 +5 -- !query 5 -select extract(day from c) from t +select extract(week from c) from t -- !query 5 schema -struct +struct -- !query 5 output -6 +18 -- !query 6 -select extract(dayofweek from c) from t +select extract(day from c) from t -- !query 6 schema -struct +struct -- !query 6 output 6 -- !query 7 -select extract(dow from c) from t +select extract(dayofweek from c) from t -- !query 7 schema -struct<(dayofweek(CAST(c AS DATE)) - 1):int> +struct -- !query 7 output -5 +6 -- !query 8 -select extract(isodow from c) from t +select extract(dow from c) from t -- !query 8 schema -struct<(weekday(CAST(c AS DATE)) + 1):int> +struct<(dayofweek(CAST(c AS DATE)) - 1):int> -- !query 8 output 5 -- !query 9 -select extract(doy from c) from t +select extract(isodow from c) from t -- !query 9 schema -struct +struct<(weekday(CAST(c AS DATE)) + 1):int> -- !query 9 output -126 +5 -- !query 10 -select extract(hour from c) from t +select extract(doy from c) from t -- !query 10 schema -struct +struct -- !query 10 output -7 +126 -- !query 11 -select extract(minute from c) from t +select extract(hour from c) from t -- !query 11 schema -struct +struct -- !query 11 output -8 +7 -- !query 12 -select extract(second from c) from t +select extract(minute from c) from t -- !query 12 schema -struct +struct -- !query 12 output -9 +8 -- !query 13 -select extract(milliseconds from c) from t +select extract(second from c) from t -- !query 13 schema -struct +struct -- !query 13 output -9123 +9 -- !query 14 -select extract(microseconds from c) from t +select extract(milliseconds from c) from t -- !query 14 schema -struct +struct -- !query 14 output -9123456 +9123 -- !query 15 -select extract(epoch from c) from t +select extract(microseconds from c) from t -- !query 15 schema -struct +struct -- !query 15 output -1304665689.123456 +9123456 -- !query 16 -select extract(not_supported from c) from t +select extract(epoch from c) from t -- !query 16 schema -struct<> +struct -- !query 16 output +1304665689.123456 + + +-- !query 17 +select extract(not_supported from c) from t +-- !query 17 schema +struct<> +-- !query 17 output org.apache.spark.sql.catalyst.parser.ParseException Literals of type 'NOT_SUPPORTED' are currently not supported.(line 1, pos 7) From 90ebddbaf7d4e3e12b63826f722bf6cc6ed769f7 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sat, 10 Aug 2019 17:50:30 +0500 Subject: [PATCH 06/18] Fix test for getEpoch --- .../spark/sql/catalyst/util/DateTimeUtils.scala | 8 +++++--- .../expressions/DateExpressionsSuite.scala | 14 ++++++++------ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 9d22813450e9a..4a045ce4c34c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -25,6 +25,7 @@ import java.util.concurrent.TimeUnit._ import scala.util.control.NonFatal +import org.apache.spark.sql.types.Decimal import org.apache.spark.unsafe.types.UTF8String /** @@ -807,8 +808,9 @@ object DateTimeUtils { * Returns the number of seconds with fractional part in microsecond precision * since 1970-01-01 00:00:00 local time. */ - def getEpoch(time: SQLTimestamp, zoneId: ZoneId): BigDecimal = { - val offset = zoneId.getRules.getOffset(microsToInstant(time)) - BigDecimal(time) / MICROS_PER_SECOND + offset.getTotalSeconds + def getEpoch(timestamp: SQLTimestamp, zoneId: ZoneId): Decimal = { + val offset = zoneId.getRules.getOffset(microsToInstant(timestamp)).getTotalSeconds + val sinceEpoch = BigDecimal(timestamp) / MICROS_PER_SECOND + offset + new Decimal().set(sinceEpoch, 20, 6) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index df7583e41ece0..a655d3aa88b8c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import java.text.SimpleDateFormat -import java.time.{LocalDateTime, ZoneId, ZoneOffset} +import java.time.{Instant, LocalDateTime, ZoneId, ZoneOffset} import java.util.{Calendar, Locale, TimeZone} import java.util.concurrent.TimeUnit import java.util.concurrent.TimeUnit._ @@ -963,13 +963,15 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("epoch") { val zoneId = ZoneId.systemDefault() - val secFractions = 0.123456 + val nanos = 123456000 val timestamp = Epoch(MakeTimestamp( - Literal(2019), Literal(8), Literal(9), Literal(0), Literal(0), Literal(secFractions), - Some(Literal(zoneId.getId)))) - val instant = LocalDateTime.of(2019, 8, 9, 0, 0, 0, 123456) + Literal(2019), Literal(8), Literal(9), Literal(0), Literal(0), + Literal(nanos / DateTimeUtils.NANOS_PER_SECOND.toDouble), Some(Literal(zoneId.getId)))) + val instant = LocalDateTime.of(2019, 8, 9, 0, 0, 0, nanos) .atZone(zoneId).toInstant - val expected = instant.getEpochSecond + secFractions + val expected = Decimal(BigDecimal(nanos) / DateTimeUtils.NANOS_PER_SECOND + + instant.getEpochSecond + + zoneId.getRules.getOffset(instant).getTotalSeconds) checkEvaluation(timestamp, expected) } From 296250ecaf24cef2333b8fb58bda27e3710db4ff Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sat, 10 Aug 2019 17:51:44 +0500 Subject: [PATCH 07/18] Remove unused import --- .../spark/sql/catalyst/expressions/DateExpressionsSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index a655d3aa88b8c..10eabc2bd5b73 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import java.text.SimpleDateFormat -import java.time.{Instant, LocalDateTime, ZoneId, ZoneOffset} +import java.time.{LocalDateTime, ZoneId, ZoneOffset} import java.util.{Calendar, Locale, TimeZone} import java.util.concurrent.TimeUnit import java.util.concurrent.TimeUnit._ From b97aadbe51bad43911b4276ebbb906e7ca7a28cc Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sun, 11 Aug 2019 00:04:40 +0500 Subject: [PATCH 08/18] Tests for microseconds and milliseconds --- .../expressions/DateExpressionsSuite.scala | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 10eabc2bd5b73..5eba243cb4ff1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -961,6 +961,25 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(makeTimestampExpr.copy(sec = Literal(60.5)), null) } + test("milliseconds and microseconds") { + outstandingTimezonesIds.foreach { timezone => + val timestamp = MakeTimestamp(Literal(2019), Literal(8), Literal(10), + Literal(0), Literal(0), Literal(10.123456789), Some(Literal(timezone))) + + checkEvaluation(Milliseconds(timestamp), 10123) + checkEvaluation(Microseconds(timestamp), 10123456) + + checkEvaluation(Milliseconds(timestamp.copy(sec = Literal(0.0))), 0) + checkEvaluation(Microseconds(timestamp.copy(sec = Literal(0.0))), 0) + + checkEvaluation(Milliseconds(timestamp.copy(sec = Literal(59.999))), 59999) + checkEvaluation(Microseconds(timestamp.copy(sec = Literal(59.999999))), 59999999) + + checkEvaluation(Milliseconds(timestamp.copy(sec = Literal(60.0))), 0) + checkEvaluation(Microseconds(timestamp.copy(sec = Literal(60.0))), 0) + } + } + test("epoch") { val zoneId = ZoneId.systemDefault() val nanos = 123456000 From d16a6ed96b4d85f2e94a9ea10f2198a5b299709e Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sun, 11 Aug 2019 18:52:48 +0500 Subject: [PATCH 09/18] Re-gen results for date.sql --- .../sql-tests/results/pgSQL/date.sql.out | 266 ++++++++++-------- 1 file changed, 141 insertions(+), 125 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out index f5586c5a4aa3a..cb2be6d1cd22d 100644 --- a/sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 89 +-- Number of queries: 91 -- !query 0 @@ -502,336 +502,352 @@ struct -- !query 47 -SELECT EXTRACT(CENTURY FROM TO_DATE('0101-12-31 BC', 'yyyy-MM-dd G')) +SELECT EXTRACT(EPOCH FROM DATE '1970-01-01') -- !query 47 schema -struct +struct -- !query 47 output --2 +0 -- !query 48 -SELECT EXTRACT(CENTURY FROM TO_DATE('0100-12-31 BC', 'yyyy-MM-dd G')) +SELECT EXTRACT(EPOCH FROM TIMESTAMP '1970-01-01') -- !query 48 schema -struct +struct -- !query 48 output --1 +0 -- !query 49 -SELECT EXTRACT(CENTURY FROM TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G')) +SELECT EXTRACT(CENTURY FROM TO_DATE('0101-12-31 BC', 'yyyy-MM-dd G')) -- !query 49 schema -struct +struct -- !query 49 output --1 +-2 -- !query 50 -SELECT EXTRACT(CENTURY FROM DATE '0001-01-01') +SELECT EXTRACT(CENTURY FROM TO_DATE('0100-12-31 BC', 'yyyy-MM-dd G')) -- !query 50 schema -struct +struct -- !query 50 output -1 +-1 -- !query 51 -SELECT EXTRACT(CENTURY FROM DATE '0001-01-01 AD') +SELECT EXTRACT(CENTURY FROM TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G')) -- !query 51 schema -struct +struct -- !query 51 output -1 +-1 -- !query 52 -SELECT EXTRACT(CENTURY FROM DATE '1900-12-31') +SELECT EXTRACT(CENTURY FROM DATE '0001-01-01') -- !query 52 schema -struct +struct -- !query 52 output -19 +1 -- !query 53 -SELECT EXTRACT(CENTURY FROM DATE '1901-01-01') +SELECT EXTRACT(CENTURY FROM DATE '0001-01-01 AD') -- !query 53 schema -struct +struct -- !query 53 output -20 +1 -- !query 54 -SELECT EXTRACT(CENTURY FROM DATE '2000-12-31') +SELECT EXTRACT(CENTURY FROM DATE '1900-12-31') -- !query 54 schema -struct +struct -- !query 54 output -20 +19 -- !query 55 -SELECT EXTRACT(CENTURY FROM DATE '2001-01-01') +SELECT EXTRACT(CENTURY FROM DATE '1901-01-01') -- !query 55 schema -struct +struct -- !query 55 output -21 +20 -- !query 56 -SELECT EXTRACT(CENTURY FROM CURRENT_DATE)>=21 AS True +SELECT EXTRACT(CENTURY FROM DATE '2000-12-31') -- !query 56 schema -struct +struct -- !query 56 output -true +20 -- !query 57 -SELECT EXTRACT(MILLENNIUM FROM TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G')) +SELECT EXTRACT(CENTURY FROM DATE '2001-01-01') -- !query 57 schema -struct +struct -- !query 57 output --1 +21 -- !query 58 -SELECT EXTRACT(MILLENNIUM FROM DATE '0001-01-01 AD') +SELECT EXTRACT(CENTURY FROM CURRENT_DATE)>=21 AS True -- !query 58 schema -struct +struct -- !query 58 output -1 +true -- !query 59 -SELECT EXTRACT(MILLENNIUM FROM DATE '1000-12-31') +SELECT EXTRACT(MILLENNIUM FROM TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G')) -- !query 59 schema -struct +struct -- !query 59 output -1 +-1 -- !query 60 -SELECT EXTRACT(MILLENNIUM FROM DATE '1001-01-01') +SELECT EXTRACT(MILLENNIUM FROM DATE '0001-01-01 AD') -- !query 60 schema -struct +struct -- !query 60 output -2 +1 -- !query 61 -SELECT EXTRACT(MILLENNIUM FROM DATE '2000-12-31') +SELECT EXTRACT(MILLENNIUM FROM DATE '1000-12-31') -- !query 61 schema -struct +struct -- !query 61 output -2 +1 -- !query 62 -SELECT EXTRACT(MILLENNIUM FROM DATE '2001-01-01') +SELECT EXTRACT(MILLENNIUM FROM DATE '1001-01-01') -- !query 62 schema -struct +struct -- !query 62 output -3 +2 -- !query 63 -SELECT EXTRACT(MILLENNIUM FROM CURRENT_DATE) +SELECT EXTRACT(MILLENNIUM FROM DATE '2000-12-31') -- !query 63 schema -struct +struct -- !query 63 output -3 +2 -- !query 64 -SELECT EXTRACT(DECADE FROM DATE '1994-12-25') +SELECT EXTRACT(MILLENNIUM FROM DATE '2001-01-01') -- !query 64 schema -struct +struct -- !query 64 output -199 +3 -- !query 65 -SELECT EXTRACT(DECADE FROM DATE '0010-01-01') +SELECT EXTRACT(MILLENNIUM FROM CURRENT_DATE) -- !query 65 schema -struct +struct -- !query 65 output -1 +3 -- !query 66 -SELECT EXTRACT(DECADE FROM DATE '0009-12-31') +SELECT EXTRACT(DECADE FROM DATE '1994-12-25') -- !query 66 schema -struct +struct -- !query 66 output -0 +199 -- !query 67 -SELECT EXTRACT(DECADE FROM TO_DATE('0001-01-01 BC', 'yyyy-MM-dd G')) +SELECT EXTRACT(DECADE FROM DATE '0010-01-01') -- !query 67 schema -struct +struct -- !query 67 output -0 +1 -- !query 68 -SELECT EXTRACT(DECADE FROM TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G')) +SELECT EXTRACT(DECADE FROM DATE '0009-12-31') -- !query 68 schema -struct +struct -- !query 68 output --1 +0 -- !query 69 -SELECT EXTRACT(DECADE FROM TO_DATE('0011-01-01 BC', 'yyyy-MM-dd G')) +SELECT EXTRACT(DECADE FROM TO_DATE('0001-01-01 BC', 'yyyy-MM-dd G')) -- !query 69 schema -struct +struct -- !query 69 output --1 +0 -- !query 70 -SELECT EXTRACT(DECADE FROM TO_DATE('0012-12-31 BC', 'yyyy-MM-dd G')) +SELECT EXTRACT(DECADE FROM TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G')) -- !query 70 schema -struct +struct -- !query 70 output --2 +-1 -- !query 71 -SELECT EXTRACT(CENTURY FROM NOW())>=21 AS True +SELECT EXTRACT(DECADE FROM TO_DATE('0011-01-01 BC', 'yyyy-MM-dd G')) -- !query 71 schema -struct +struct -- !query 71 output -true +-1 -- !query 72 -SELECT EXTRACT(CENTURY FROM TIMESTAMP '1970-03-20 04:30:00.00000') +SELECT EXTRACT(DECADE FROM TO_DATE('0012-12-31 BC', 'yyyy-MM-dd G')) -- !query 72 schema -struct +struct -- !query 72 output -20 +-2 -- !query 73 -SELECT DATE_TRUNC('MILLENNIUM', TIMESTAMP '1970-03-20 04:30:00.00000') +SELECT EXTRACT(CENTURY FROM NOW())>=21 AS True -- !query 73 schema -struct +struct -- !query 73 output -1001-01-01 00:07:02 +true -- !query 74 -SELECT DATE_TRUNC('MILLENNIUM', DATE '1970-03-20') +SELECT EXTRACT(CENTURY FROM TIMESTAMP '1970-03-20 04:30:00.00000') -- !query 74 schema -struct +struct -- !query 74 output -1001-01-01 00:07:02 +20 -- !query 75 -SELECT DATE_TRUNC('CENTURY', TIMESTAMP '1970-03-20 04:30:00.00000') +SELECT DATE_TRUNC('MILLENNIUM', TIMESTAMP '1970-03-20 04:30:00.00000') -- !query 75 schema -struct +struct -- !query 75 output -1901-01-01 00:00:00 +1001-01-01 00:07:02 -- !query 76 -SELECT DATE_TRUNC('CENTURY', DATE '1970-03-20') +SELECT DATE_TRUNC('MILLENNIUM', DATE '1970-03-20') -- !query 76 schema -struct +struct -- !query 76 output -1901-01-01 00:00:00 +1001-01-01 00:07:02 -- !query 77 -SELECT DATE_TRUNC('CENTURY', DATE '2004-08-10') +SELECT DATE_TRUNC('CENTURY', TIMESTAMP '1970-03-20 04:30:00.00000') -- !query 77 schema -struct +struct -- !query 77 output -2001-01-01 00:00:00 +1901-01-01 00:00:00 -- !query 78 -SELECT DATE_TRUNC('CENTURY', DATE '0002-02-04') +SELECT DATE_TRUNC('CENTURY', DATE '1970-03-20') -- !query 78 schema -struct +struct -- !query 78 output -0001-01-01 00:07:02 +1901-01-01 00:00:00 -- !query 79 -SELECT DATE_TRUNC('CENTURY', TO_DATE('0055-08-10 BC', 'yyyy-MM-dd G')) +SELECT DATE_TRUNC('CENTURY', DATE '2004-08-10') -- !query 79 schema -struct +struct -- !query 79 output --0099-01-01 00:07:02 +2001-01-01 00:00:00 -- !query 80 -SELECT DATE_TRUNC('DECADE', DATE '1993-12-25') +SELECT DATE_TRUNC('CENTURY', DATE '0002-02-04') -- !query 80 schema -struct +struct -- !query 80 output -1990-01-01 00:00:00 +0001-01-01 00:07:02 -- !query 81 -SELECT DATE_TRUNC('DECADE', DATE '0004-12-25') +SELECT DATE_TRUNC('CENTURY', TO_DATE('0055-08-10 BC', 'yyyy-MM-dd G')) -- !query 81 schema -struct +struct -- !query 81 output -0000-01-01 00:07:02 +-0099-01-01 00:07:02 -- !query 82 -SELECT DATE_TRUNC('DECADE', TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G')) +SELECT DATE_TRUNC('DECADE', DATE '1993-12-25') -- !query 82 schema -struct +struct -- !query 82 output --0010-01-01 00:07:02 +1990-01-01 00:00:00 -- !query 83 -select make_date(2013, 7, 15) +SELECT DATE_TRUNC('DECADE', DATE '0004-12-25') -- !query 83 schema -struct +struct -- !query 83 output -2013-07-15 +0000-01-01 00:07:02 -- !query 84 -select make_date(-44, 3, 15) +SELECT DATE_TRUNC('DECADE', TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G')) -- !query 84 schema -struct +struct -- !query 84 output --0044-03-15 +-0010-01-01 00:07:02 -- !query 85 -select make_date(2013, 2, 30) +select make_date(2013, 7, 15) -- !query 85 schema -struct +struct -- !query 85 output -NULL +2013-07-15 -- !query 86 -select make_date(2013, 13, 1) +select make_date(-44, 3, 15) -- !query 86 schema -struct +struct -- !query 86 output -NULL +-0044-03-15 -- !query 87 -select make_date(2013, 11, -1) +select make_date(2013, 2, 30) -- !query 87 schema -struct +struct -- !query 87 output NULL -- !query 88 -DROP TABLE DATE_TBL +select make_date(2013, 13, 1) -- !query 88 schema -struct<> +struct -- !query 88 output +NULL + + +-- !query 89 +select make_date(2013, 11, -1) +-- !query 89 schema +struct +-- !query 89 output +NULL + + +-- !query 90 +DROP TABLE DATE_TBL +-- !query 90 schema +struct<> +-- !query 90 output From 981cd7338bbb228fce1a2f9cdbb96fa02ab25258 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sun, 11 Aug 2019 23:02:07 +0500 Subject: [PATCH 10/18] Revert comment in date.sql --- sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql index 213609fa73a03..5f492428ad3b3 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql @@ -228,7 +228,7 @@ SELECT f1 - date '2000-01-01' AS `Days From 2K` FROM DATE_TBL; -- test extract! -- -- epoch - +-- SELECT EXTRACT(EPOCH FROM DATE '1970-01-01'); SELECT EXTRACT(EPOCH FROM TIMESTAMP '1970-01-01'); -- SELECT EXTRACT(EPOCH FROM TIMESTAMPTZ '1970-01-01+00'); -- 0 From f152004a2551bc0e1ffb7f82f3daf4051bd09292 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sun, 11 Aug 2019 23:03:58 +0500 Subject: [PATCH 11/18] 1 000 -> 1000, and 1 000 000 -> 1000000 --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index ac7f661d713ee..39161aba4a324 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -457,7 +457,7 @@ object DateTimeUtils { } /** - * Returns seconds, including fractional parts, multiplied by 1 000. The timestamp + * Returns seconds, including fractional parts, multiplied by 1000. The timestamp * is expressed in microseconds since the epoch. */ def getMilliseconds(timestamp: SQLTimestamp, timeZone: TimeZone): Int = { @@ -465,7 +465,7 @@ object DateTimeUtils { } /** - * Returns seconds, including fractional parts, multiplied by 1 000 000. The timestamp + * Returns seconds, including fractional parts, multiplied by 1000000. The timestamp * is expressed in microseconds since the epoch. */ def getMicroseconds(timestamp: SQLTimestamp, timeZone: TimeZone): Int = { From f025e6e38cca2f3e3f4522715e27ad7620fdd150 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 12 Aug 2019 11:47:26 +0500 Subject: [PATCH 12/18] Revert comments in date.sql --- sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql index 5f492428ad3b3..8d7c218d72e49 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql @@ -229,8 +229,8 @@ SELECT f1 - date '2000-01-01' AS `Days From 2K` FROM DATE_TBL; -- -- epoch -- -SELECT EXTRACT(EPOCH FROM DATE '1970-01-01'); -SELECT EXTRACT(EPOCH FROM TIMESTAMP '1970-01-01'); +SELECT EXTRACT(EPOCH FROM DATE '1970-01-01'); -- 0 +SELECT EXTRACT(EPOCH FROM TIMESTAMP '1970-01-01'); -- 0 -- SELECT EXTRACT(EPOCH FROM TIMESTAMPTZ '1970-01-01+00'); -- 0 -- -- century From c0dea7b4fb2a60a7deadd8d4a5a06cb93a0525e8 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 12 Aug 2019 12:09:38 +0500 Subject: [PATCH 13/18] Add a comment about type of epoch --- .../spark/sql/catalyst/expressions/datetimeExpressions.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 9011f1ab0f98e..6493d8b650059 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1935,6 +1935,10 @@ case class Epoch(child: Expression, timeZoneId: Option[String] = None) extends UnaryExpression with ImplicitCastInputTypes with TimeZoneAwareExpression { override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) + // DecimalType is used to not lose precision while converting microseconds to + // the fractional part of seconds. Scale 6 is taken to have all microseconds as + // the fraction. The precision 20 should cover whole valid range of years [1, 9999] + // plus negative years that can be used in some cases though are not officially supported. override def dataType: DataType = DecimalType(20, 6) override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) From 3c848e747eb9f6efc04954311402b1400d872be2 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 12 Aug 2019 23:24:22 +0500 Subject: [PATCH 14/18] Use DECIMAL for milliseconds --- .../sql/catalyst/expressions/datetimeExpressions.scala | 6 +++++- .../apache/spark/sql/catalyst/util/DateTimeUtils.scala | 5 +++-- .../sql/catalyst/expressions/DateExpressionsSuite.scala | 9 +++++---- .../src/test/resources/sql-tests/results/extract.sql.out | 4 ++-- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 6493d8b650059..64e1a8c412989 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -306,7 +306,11 @@ case class Milliseconds(child: Expression, timeZoneId: Option[String] = None) extends UnaryExpression with ImplicitCastInputTypes with TimeZoneAwareExpression { override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) - override def dataType: DataType = IntegerType + // DecimalType is used here to not lose precision while converting microseconds to + // the fractional part of milliseconds. Scale 3 is taken to have all microseconds as + // the fraction. The precision 8 should cover 2 digits for seconds, 3 digits for + // milliseconds and 3 digits for microseconds. + override def dataType: DataType = DecimalType(8, 3) override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 39161aba4a324..65a9bee5eaedd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -460,8 +460,9 @@ object DateTimeUtils { * Returns seconds, including fractional parts, multiplied by 1000. The timestamp * is expressed in microseconds since the epoch. */ - def getMilliseconds(timestamp: SQLTimestamp, timeZone: TimeZone): Int = { - getMicroseconds(timestamp, timeZone) / MICROS_PER_MILLIS.toInt + def getMilliseconds(timestamp: SQLTimestamp, timeZone: TimeZone): Decimal = { + val micros = Decimal(getMicroseconds(timestamp, timeZone)) + (micros / Decimal(MICROS_PER_MILLIS)).toPrecision(8, 3) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 5b61a661703ee..62f1bcb61d4ce 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -1012,16 +1012,17 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val timestamp = MakeTimestamp(Literal(2019), Literal(8), Literal(10), Literal(0), Literal(0), Literal(10.123456789), Some(Literal(timezone))) - checkEvaluation(Milliseconds(timestamp), 10123) + checkEvaluation(Milliseconds(timestamp), Decimal(BigDecimal(10123.456))) checkEvaluation(Microseconds(timestamp), 10123456) - checkEvaluation(Milliseconds(timestamp.copy(sec = Literal(0.0))), 0) + checkEvaluation(Milliseconds(timestamp.copy(sec = Literal(0.0))), Decimal(0, 8, 3)) checkEvaluation(Microseconds(timestamp.copy(sec = Literal(0.0))), 0) - checkEvaluation(Milliseconds(timestamp.copy(sec = Literal(59.999))), 59999) + checkEvaluation(Milliseconds(timestamp.copy(sec = Literal(59.999999))), + Decimal(BigDecimal(59999.999), 8, 3)) checkEvaluation(Microseconds(timestamp.copy(sec = Literal(59.999999))), 59999999) - checkEvaluation(Milliseconds(timestamp.copy(sec = Literal(60.0))), 0) + checkEvaluation(Milliseconds(timestamp.copy(sec = Literal(60.0))), Decimal(0, 8, 3)) checkEvaluation(Microseconds(timestamp.copy(sec = Literal(60.0))), 0) } } diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out index 87b1782c4160e..e4cad1c51f31f 100644 --- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out @@ -117,9 +117,9 @@ struct -- !query 14 select extract(milliseconds from c) from t -- !query 14 schema -struct +struct -- !query 14 output -9123 +9123.456 -- !query 15 From caa54893652a507636b29660ea32985dd1113de1 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 13 Aug 2019 11:16:47 +0500 Subject: [PATCH 15/18] Support usec and msec --- .../sql/catalyst/parser/AstBuilder.scala | 4 +-- .../resources/sql-tests/inputs/extract.sql | 2 ++ .../sql-tests/results/extract.sql.out | 34 ++++++++++++++----- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 43d7ddb625c61..d3d73374447b6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1428,9 +1428,9 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging Minute(expression(ctx.source)) case "SECOND" => Second(expression(ctx.source)) - case "MILLISECONDS" => + case "MILLISECONDS" | "MSEC" => Milliseconds(expression(ctx.source)) - case "MICROSECONDS" => + case "MICROSECONDS" | "USEC" => Microseconds(expression(ctx.source)) case "EPOCH" => Epoch(expression(ctx.source)) diff --git a/sql/core/src/test/resources/sql-tests/inputs/extract.sql b/sql/core/src/test/resources/sql-tests/inputs/extract.sql index a0cac484bb431..2cab09ac218e9 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/extract.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/extract.sql @@ -27,8 +27,10 @@ select extract(minute from c) from t; select extract(second from c) from t; select extract(milliseconds from c) from t; +select extract(msec from c) from t; select extract(microseconds from c) from t; +select extract(usec from c) from t; select extract(epoch from c) from t; diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out index e4cad1c51f31f..5d793a43f7e7d 100644 --- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 18 +-- Number of queries: 20 -- !query 0 @@ -123,26 +123,42 @@ struct -- !query 15 -select extract(microseconds from c) from t +select extract(msec from c) from t -- !query 15 schema -struct +struct -- !query 15 output -9123456 +9123.456 -- !query 16 -select extract(epoch from c) from t +select extract(microseconds from c) from t -- !query 16 schema -struct +struct -- !query 16 output -1304665689.123456 +9123456 -- !query 17 -select extract(not_supported from c) from t +select extract(usec from c) from t -- !query 17 schema -struct<> +struct -- !query 17 output +9123456 + + +-- !query 18 +select extract(epoch from c) from t +-- !query 18 schema +struct +-- !query 18 output +1304665689.123456 + + +-- !query 19 +select extract(not_supported from c) from t +-- !query 19 schema +struct<> +-- !query 19 output org.apache.spark.sql.catalyst.parser.ParseException Literals of type 'NOT_SUPPORTED' are currently not supported.(line 1, pos 7) From 7fcb6a42b0d3d113d06d6dc225b7eba3f4d7f6dd Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 13 Aug 2019 22:29:29 +0500 Subject: [PATCH 16/18] Add synonyms for microseconds and milliseconds --- .../sql/catalyst/parser/AstBuilder.scala | 4 +- .../resources/sql-tests/inputs/extract.sql | 8 ++ .../sql-tests/results/extract.sql.out | 88 ++++++++++++++++--- 3 files changed, 86 insertions(+), 14 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index d3d73374447b6..205f70d220f04 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1428,9 +1428,9 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging Minute(expression(ctx.source)) case "SECOND" => Second(expression(ctx.source)) - case "MILLISECONDS" | "MSEC" => + case "MILLISECONDS" | "MSEC" | "MSECS" | "MILLISECON" | "MSECONDS" | "MS" => Milliseconds(expression(ctx.source)) - case "MICROSECONDS" | "USEC" => + case "MICROSECONDS" | "USEC" | "USECS" | "USECONDS" | "MICROSECON" | "US" => Microseconds(expression(ctx.source)) case "EPOCH" => Epoch(expression(ctx.source)) diff --git a/sql/core/src/test/resources/sql-tests/inputs/extract.sql b/sql/core/src/test/resources/sql-tests/inputs/extract.sql index 2cab09ac218e9..14a0398e63365 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/extract.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/extract.sql @@ -28,9 +28,17 @@ select extract(second from c) from t; select extract(milliseconds from c) from t; select extract(msec from c) from t; +select extract(msecs from c) from t; +select extract(millisecon from c) from t; +select extract(mseconds from c) from t; +select extract(ms from c) from t; select extract(microseconds from c) from t; select extract(usec from c) from t; +select extract(usecs from c) from t; +select extract(useconds from c) from t; +select extract(microsecon from c) from t; +select extract(us from c) from t; select extract(epoch from c) from t; diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out index 5d793a43f7e7d..1f4a0feebd215 100644 --- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 20 +-- Number of queries: 28 -- !query 0 @@ -131,34 +131,98 @@ struct -- !query 16 -select extract(microseconds from c) from t +select extract(msecs from c) from t -- !query 16 schema -struct +struct -- !query 16 output -9123456 +9123.456 -- !query 17 -select extract(usec from c) from t +select extract(millisecon from c) from t -- !query 17 schema -struct +struct -- !query 17 output -9123456 +9123.456 -- !query 18 -select extract(epoch from c) from t +select extract(mseconds from c) from t -- !query 18 schema -struct +struct -- !query 18 output -1304665689.123456 +9123.456 -- !query 19 -select extract(not_supported from c) from t +select extract(ms from c) from t -- !query 19 schema -struct<> +struct -- !query 19 output +9123.456 + + +-- !query 20 +select extract(microseconds from c) from t +-- !query 20 schema +struct +-- !query 20 output +9123456 + + +-- !query 21 +select extract(usec from c) from t +-- !query 21 schema +struct +-- !query 21 output +9123456 + + +-- !query 22 +select extract(usecs from c) from t +-- !query 22 schema +struct +-- !query 22 output +9123456 + + +-- !query 23 +select extract(useconds from c) from t +-- !query 23 schema +struct +-- !query 23 output +9123456 + + +-- !query 24 +select extract(microsecon from c) from t +-- !query 24 schema +struct +-- !query 24 output +9123456 + + +-- !query 25 +select extract(us from c) from t +-- !query 25 schema +struct +-- !query 25 output +9123456 + + +-- !query 26 +select extract(epoch from c) from t +-- !query 26 schema +struct +-- !query 26 output +1304665689.123456 + + +-- !query 27 +select extract(not_supported from c) from t +-- !query 27 schema +struct<> +-- !query 27 output org.apache.spark.sql.catalyst.parser.ParseException Literals of type 'NOT_SUPPORTED' are currently not supported.(line 1, pos 7) From c978b9b25a29ac5fa58e01a6e0ee028310b7d63c Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Wed, 14 Aug 2019 11:12:22 +0500 Subject: [PATCH 17/18] Re-gen extract.sql.out --- .../sql-tests/results/extract.sql.out | 220 +++++++++++++----- 1 file changed, 166 insertions(+), 54 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out index a7efe825c98e5..b02dfe054344b 100644 --- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 50 +-- Number of queries: 64 -- !query 0 @@ -147,15 +147,15 @@ struct -- !query 18 -select extract(quarter from c) from t +select extract(isoyear from c) from t -- !query 18 schema -struct +struct -- !query 18 output -2 +2011 -- !query 19 -select extract(qtr from c) from t +select extract(quarter from c) from t -- !query 19 schema struct -- !query 19 output @@ -163,15 +163,15 @@ struct -- !query 20 -select extract(month from c) from t +select extract(qtr from c) from t -- !query 20 schema -struct +struct -- !query 20 output -5 +2 -- !query 21 -select extract(mon from c) from t +select extract(month from c) from t -- !query 21 schema struct -- !query 21 output @@ -179,7 +179,7 @@ struct -- !query 22 -select extract(mons from c) from t +select extract(mon from c) from t -- !query 22 schema struct -- !query 22 output @@ -187,7 +187,7 @@ struct -- !query 23 -select extract(months from c) from t +select extract(mons from c) from t -- !query 23 schema struct -- !query 23 output @@ -195,15 +195,15 @@ struct -- !query 24 -select extract(week from c) from t +select extract(months from c) from t -- !query 24 schema -struct +struct -- !query 24 output -18 +5 -- !query 25 -select extract(w from c) from t +select extract(week from c) from t -- !query 25 schema struct -- !query 25 output @@ -211,7 +211,7 @@ struct -- !query 26 -select extract(weeks from c) from t +select extract(w from c) from t -- !query 26 schema struct -- !query 26 output @@ -219,15 +219,15 @@ struct -- !query 27 -select extract(day from c) from t +select extract(weeks from c) from t -- !query 27 schema -struct +struct -- !query 27 output -6 +18 -- !query 28 -select extract(d from c) from t +select extract(day from c) from t -- !query 28 schema struct -- !query 28 output @@ -235,7 +235,7 @@ struct -- !query 29 -select extract(days from c) from t +select extract(d from c) from t -- !query 29 schema struct -- !query 29 output @@ -243,47 +243,47 @@ struct -- !query 30 -select extract(dayofweek from c) from t +select extract(days from c) from t -- !query 30 schema -struct +struct -- !query 30 output 6 -- !query 31 -select extract(dow from c) from t +select extract(dayofweek from c) from t -- !query 31 schema -struct<(dayofweek(CAST(c AS DATE)) - 1):int> +struct -- !query 31 output -5 +6 -- !query 32 -select extract(isodow from c) from t +select extract(dow from c) from t -- !query 32 schema -struct<(weekday(CAST(c AS DATE)) + 1):int> +struct<(dayofweek(CAST(c AS DATE)) - 1):int> -- !query 32 output 5 -- !query 33 -select extract(doy from c) from t +select extract(isodow from c) from t -- !query 33 schema -struct +struct<(weekday(CAST(c AS DATE)) + 1):int> -- !query 33 output -126 +5 -- !query 34 -select extract(hour from c) from t +select extract(doy from c) from t -- !query 34 schema -struct +struct -- !query 34 output -7 +126 -- !query 35 -select extract(h from c) from t +select extract(hour from c) from t -- !query 35 schema struct -- !query 35 output @@ -291,7 +291,7 @@ struct -- !query 36 -select extract(hours from c) from t +select extract(h from c) from t -- !query 36 schema struct -- !query 36 output @@ -299,7 +299,7 @@ struct -- !query 37 -select extract(hr from c) from t +select extract(hours from c) from t -- !query 37 schema struct -- !query 37 output @@ -307,7 +307,7 @@ struct -- !query 38 -select extract(hrs from c) from t +select extract(hr from c) from t -- !query 38 schema struct -- !query 38 output @@ -315,15 +315,15 @@ struct -- !query 39 -select extract(minute from c) from t +select extract(hrs from c) from t -- !query 39 schema -struct +struct -- !query 39 output -8 +7 -- !query 40 -select extract(m from c) from t +select extract(minute from c) from t -- !query 40 schema struct -- !query 40 output @@ -331,7 +331,7 @@ struct -- !query 41 -select extract(min from c) from t +select extract(m from c) from t -- !query 41 schema struct -- !query 41 output @@ -339,7 +339,7 @@ struct -- !query 42 -select extract(mins from c) from t +select extract(min from c) from t -- !query 42 schema struct -- !query 42 output @@ -347,7 +347,7 @@ struct -- !query 43 -select extract(minutes from c) from t +select extract(mins from c) from t -- !query 43 schema struct -- !query 43 output @@ -355,15 +355,15 @@ struct -- !query 44 -select extract(second from c) from t +select extract(minutes from c) from t -- !query 44 schema -struct +struct -- !query 44 output -9 +8 -- !query 45 -select extract(s from c) from t +select extract(second from c) from t -- !query 45 schema struct -- !query 45 output @@ -371,7 +371,7 @@ struct -- !query 46 -select extract(sec from c) from t +select extract(s from c) from t -- !query 46 schema struct -- !query 46 output @@ -379,7 +379,7 @@ struct -- !query 47 -select extract(seconds from c) from t +select extract(sec from c) from t -- !query 47 schema struct -- !query 47 output @@ -387,7 +387,7 @@ struct -- !query 48 -select extract(secs from c) from t +select extract(seconds from c) from t -- !query 48 schema struct -- !query 48 output @@ -395,10 +395,122 @@ struct -- !query 49 -select extract(not_supported from c) from t +select extract(secs from c) from t -- !query 49 schema -struct<> +struct -- !query 49 output +9 + + +-- !query 50 +select extract(milliseconds from c) from t +-- !query 50 schema +struct +-- !query 50 output +9123.456 + + +-- !query 51 +select extract(msec from c) from t +-- !query 51 schema +struct +-- !query 51 output +9123.456 + + +-- !query 52 +select extract(msecs from c) from t +-- !query 52 schema +struct +-- !query 52 output +9123.456 + + +-- !query 53 +select extract(millisecon from c) from t +-- !query 53 schema +struct +-- !query 53 output +9123.456 + + +-- !query 54 +select extract(mseconds from c) from t +-- !query 54 schema +struct +-- !query 54 output +9123.456 + + +-- !query 55 +select extract(ms from c) from t +-- !query 55 schema +struct +-- !query 55 output +9123.456 + + +-- !query 56 +select extract(microseconds from c) from t +-- !query 56 schema +struct +-- !query 56 output +9123456 + + +-- !query 57 +select extract(usec from c) from t +-- !query 57 schema +struct +-- !query 57 output +9123456 + + +-- !query 58 +select extract(usecs from c) from t +-- !query 58 schema +struct +-- !query 58 output +9123456 + + +-- !query 59 +select extract(useconds from c) from t +-- !query 59 schema +struct +-- !query 59 output +9123456 + + +-- !query 60 +select extract(microsecon from c) from t +-- !query 60 schema +struct +-- !query 60 output +9123456 + + +-- !query 61 +select extract(us from c) from t +-- !query 61 schema +struct +-- !query 61 output +9123456 + + +-- !query 62 +select extract(epoch from c) from t +-- !query 62 schema +struct +-- !query 62 output +1304665689.123456 + + +-- !query 63 +select extract(not_supported from c) from t +-- !query 63 schema +struct<> +-- !query 63 output org.apache.spark.sql.catalyst.parser.ParseException Literals of type 'NOT_SUPPORTED' are currently not supported.(line 1, pos 7) From 153d2ff745385fcb50870d6eea939bed24da02ab Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Wed, 14 Aug 2019 11:48:41 +0500 Subject: [PATCH 18/18] Fix DateExpressionsSuite --- .../expressions/DateExpressionsSuite.scala | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index bf3775126d3b5..ae3549b4aaf5f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -1013,21 +1013,24 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("milliseconds and microseconds") { outstandingTimezonesIds.foreach { timezone => - val timestamp = MakeTimestamp(Literal(2019), Literal(8), Literal(10), - Literal(0), Literal(0), Literal(10.123456789), Some(Literal(timezone))) + var timestamp = MakeTimestamp(Literal(2019), Literal(8), Literal(10), + Literal(0), Literal(0), Literal(Decimal(BigDecimal(10.123456789), 8, 6)), + Some(Literal(timezone))) - checkEvaluation(Milliseconds(timestamp), Decimal(BigDecimal(10123.456))) - checkEvaluation(Microseconds(timestamp), 10123456) + checkEvaluation(Milliseconds(timestamp), Decimal(BigDecimal(10123.457), 8, 3)) + checkEvaluation(Microseconds(timestamp), 10123457) - checkEvaluation(Milliseconds(timestamp.copy(sec = Literal(0.0))), Decimal(0, 8, 3)) - checkEvaluation(Microseconds(timestamp.copy(sec = Literal(0.0))), 0) + timestamp = timestamp.copy(sec = Literal(Decimal(0.0, 8, 6))) + checkEvaluation(Milliseconds(timestamp), Decimal(0, 8, 3)) + checkEvaluation(Microseconds(timestamp), 0) - checkEvaluation(Milliseconds(timestamp.copy(sec = Literal(59.999999))), - Decimal(BigDecimal(59999.999), 8, 3)) - checkEvaluation(Microseconds(timestamp.copy(sec = Literal(59.999999))), 59999999) + timestamp = timestamp.copy(sec = Literal(Decimal(BigDecimal(59.999999), 8, 6))) + checkEvaluation(Milliseconds(timestamp), Decimal(BigDecimal(59999.999), 8, 3)) + checkEvaluation(Microseconds(timestamp), 59999999) - checkEvaluation(Milliseconds(timestamp.copy(sec = Literal(60.0))), Decimal(0, 8, 3)) - checkEvaluation(Microseconds(timestamp.copy(sec = Literal(60.0))), 0) + timestamp = timestamp.copy(sec = Literal(Decimal(BigDecimal(60.0), 8, 6))) + checkEvaluation(Milliseconds(timestamp), Decimal(0, 8, 3)) + checkEvaluation(Microseconds(timestamp), 0) } } @@ -1036,7 +1039,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val nanos = 123456000 val timestamp = Epoch(MakeTimestamp( Literal(2019), Literal(8), Literal(9), Literal(0), Literal(0), - Literal(nanos / DateTimeUtils.NANOS_PER_SECOND.toDouble), Some(Literal(zoneId.getId)))) + Literal(Decimal(nanos / DateTimeUtils.NANOS_PER_SECOND.toDouble, 8, 6)), + Some(Literal(zoneId.getId)))) val instant = LocalDateTime.of(2019, 8, 9, 0, 0, 0, nanos) .atZone(zoneId).toInstant val expected = Decimal(BigDecimal(nanos) / DateTimeUtils.NANOS_PER_SECOND +