From d705adccb7231dd80e450421f6b63e6e9177eae5 Mon Sep 17 00:00:00 2001 From: Aman Omer Date: Mon, 11 Nov 2019 23:06:40 +0530 Subject: [PATCH 1/9] Initial commit --- .../catalyst/analysis/PostgreSQLDialect.scala | 21 +++++- .../spark/sql/catalyst/expressions/Cast.scala | 4 +- .../postgreSQL/PostgreCastToTimestamp.scala | 67 +++++++++++++++++++ .../sql/PostgreSQLDialectQuerySuite.scala | 10 +++ 4 files changed, 97 insertions(+), 5 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastToTimestamp.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala index e7f0e571804d..e8fb24439355 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala @@ -19,15 +19,15 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.expressions.Cast -import org.apache.spark.sql.catalyst.expressions.postgreSQL.PostgreCastToBoolean +import org.apache.spark.sql.catalyst.expressions.postgreSQL.{PostgreCastToBoolean, PostgreCastToTimestamp} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.{BooleanType, StringType} +import org.apache.spark.sql.types.{BooleanType, TimestampType} object PostgreSQLDialect { val postgreSQLDialectRules: List[Rule[LogicalPlan]] = - CastToBoolean :: + CastToBoolean :: CastToTimestamp :: Nil object CastToBoolean extends Rule[LogicalPlan] with Logging { @@ -46,4 +46,19 @@ object PostgreSQLDialect { } } } + + object CastToTimestamp extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = { + val conf = SQLConf.get + if (conf.usePostgreSQLDialect) { + plan.transformExpressions { + case Cast(child, dataType, timeZoneId) + if dataType == TimestampType => + PostgreCastToTimestamp(child, timeZoneId) + } + } else { + plan + } + } + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index f3b58fa3137b..89d2807f9f1c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -410,7 +410,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit } // TimestampConverter - private[this] def castToTimestamp(from: DataType): Any => Any = from match { + protected[this] def castToTimestamp(from: DataType): Any => Any = from match { case StringType => buildCast[UTF8String](_, utfs => DateTimeUtils.stringToTimestamp(utfs, zoneId).orNull) case BooleanType => @@ -1159,7 +1159,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit } } - private[this] def castToTimestampCode( + protected[this] def castToTimestampCode( from: DataType, ctx: CodegenContext): CastFunction = from match { case StringType => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastToTimestamp.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastToTimestamp.scala new file mode 100644 index 000000000000..c11abd08c869 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastToTimestamp.scala @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.catalyst.expressions.postgreSQL + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.expressions.{CastBase, Expression, TimeZoneAwareExpression} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, JavaCode} +import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.catalyst.util.DateTimeUtils.epochDaysToMicros +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String + +case class PostgreCastToTimestamp(child: Expression, timeZoneId: Option[String]) + extends CastBase{ + override def dataType: DataType = TimestampType + + override protected def ansiEnabled: Boolean = SQLConf.get.ansiEnabled + + override def nullable: Boolean = true + + /** Returns a copy of this expression with the specified timeZoneId. */ + override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = + copy(timeZoneId = Option(timeZoneId)) + + override def castToTimestamp(from: DataType): Any => Any = from match { + case StringType => + buildCast[UTF8String](_, utfs => DateTimeUtils.stringToTimestamp(utfs, zoneId).orNull) + case DateType => + buildCast[Int](_, d => epochDaysToMicros(d, zoneId)) + case _ => + throw new AnalysisException( + s"Cannot cast type $from to Timestamp.") + } + + private[this] def castToTimestampCode( + from: DataType, + ctx: CodegenContext): CastFunction = from match { + case StringType => + super.castToTimestampCode(from, ctx) + case DateType => + super.castToTimestampCode(from, ctx) + case _ => + (c, evPrim, evNull) => + val fromType = JavaCode.javaType(from) + code"""throw new AnalysisException("Cannot cast type $fromType to Timestamp.");""" + } + + override def toString: String = s"PostgreCastToTimestamp($child as ${dataType.simpleString})" + + override def sql: String = s"CAST(${child.sql} AS ${dataType.sql})" +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PostgreSQLDialectQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PostgreSQLDialectQuerySuite.scala index 7056f483609a..e64668f4f330 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/PostgreSQLDialectQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/PostgreSQLDialectQuerySuite.scala @@ -39,4 +39,14 @@ class PostgreSQLDialectQuerySuite extends QueryTest with SharedSparkSession { intercept[IllegalArgumentException](sql(s"select cast('$input' as boolean)").collect()) } } + + test("cast to timestamp") { + Seq(1, 0.1, 1.toDouble, 5.toFloat, true, 3.toByte, 4.toShort) foreach { value => + val actualResult = intercept[AnalysisException]( + sql(s"SELECT CAST(${value} AS timestamp)") + ).getMessage + val expectedResult = s"Cannot cast type ${value.getClass} to Timestamp." + assert(actualResult == expectedResult) + } + } } From 8b56ae9a7334ca94821953076740696699093867 Mon Sep 17 00:00:00 2001 From: Aman Omer Date: Thu, 14 Nov 2019 10:30:37 +0530 Subject: [PATCH 2/9] Added test cases --- .../catalyst/expressions/postgreSQL/CastSuite.scala | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/CastSuite.scala index 6c5218b379f3..6e8e948b7b14 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/CastSuite.scala @@ -70,4 +70,14 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { assert(PostgreCastToBoolean(Literal(1.toDouble), None).checkInputDataTypes().isFailure) assert(PostgreCastToBoolean(Literal(1.toFloat), None).checkInputDataTypes().isFailure) } + + test("unsupported data types to cast to tiestamp") { + assert(PostgreCastToTimestamp(Literal(1), None).checkInputDataTypes().isFailure) + assert(PostgreCastToTimestamp(Literal(1.toByte), None).checkInputDataTypes().isFailure) + assert(PostgreCastToTimestamp(Literal(1.toDouble), None).checkInputDataTypes().isFailure) + assert(PostgreCastToTimestamp(Literal(1.toFloat), None).checkInputDataTypes().isFailure) + assert(PostgreCastToTimestamp(Literal(1.toLong), None).checkInputDataTypes().isFailure) + assert(PostgreCastToTimestamp(Literal(1.toShort), None).checkInputDataTypes().isFailure) + assert(PostgreCastToTimestamp(Literal(BigDecimal(1.0)), None).checkInputDataTypes().isFailure) + } } From 00ffde3deb2f26dea02d66668bbfc56e767e2c87 Mon Sep 17 00:00:00 2001 From: Aman Omer Date: Thu, 14 Nov 2019 11:48:52 +0530 Subject: [PATCH 3/9] Abstract class PostgreCastBase --- .../postgreSQL/PostgreCastBase.scala | 26 +++++++++++++++++++ .../postgreSQL/PostgreCastToTimestamp.scala | 12 +++------ 2 files changed, 30 insertions(+), 8 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastBase.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastBase.scala new file mode 100644 index 000000000000..9331d5822c91 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastBase.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.catalyst.expressions.postgreSQL + +import org.apache.spark.sql.catalyst.expressions.CastBase + +abstract class PostgreCastBase extends CastBase{ + override protected def ansiEnabled: Boolean = + throw new UnsupportedOperationException("PostgreSQL dialect doesn't support ansi mode") + + override def nullable: Boolean = child.nullable +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastToTimestamp.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastToTimestamp.scala index c11abd08c869..34c5c5a49eae 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastToTimestamp.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastToTimestamp.scala @@ -27,13 +27,9 @@ import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String case class PostgreCastToTimestamp(child: Expression, timeZoneId: Option[String]) - extends CastBase{ + extends PostgreCastBase { override def dataType: DataType = TimestampType - override protected def ansiEnabled: Boolean = SQLConf.get.ansiEnabled - - override def nullable: Boolean = true - /** Returns a copy of this expression with the specified timeZoneId. */ override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) @@ -48,9 +44,9 @@ case class PostgreCastToTimestamp(child: Expression, timeZoneId: Option[String]) s"Cannot cast type $from to Timestamp.") } - private[this] def castToTimestampCode( - from: DataType, - ctx: CodegenContext): CastFunction = from match { + override def castToTimestampCode( + from: DataType, + ctx: CodegenContext): CastFunction = from match { case StringType => super.castToTimestampCode(from, ctx) case DateType => From 6a930608550d910e5afda1907008585be539dfa2 Mon Sep 17 00:00:00 2001 From: Aman Omer Date: Thu, 14 Nov 2019 17:49:14 +0530 Subject: [PATCH 4/9] Added PostgreCastBase --- .../postgreSQL/PostgreCastBase.scala | 117 +++++++++++++++++- .../postgreSQL/PostgreCastToBoolean.scala | 83 ------------- .../postgreSQL/PostgreCastToTimestamp.scala | 63 ---------- 3 files changed, 116 insertions(+), 147 deletions(-) delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastToBoolean.scala delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastToTimestamp.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastBase.scala index 9331d5822c91..8ec17ba18b01 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastBase.scala @@ -16,11 +16,126 @@ */ package org.apache.spark.sql.catalyst.expressions.postgreSQL -import org.apache.spark.sql.catalyst.expressions.CastBase +import java.time.ZoneId + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.expressions.{CastBase, Expression, TimeZoneAwareExpression} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, JavaCode} +import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.catalyst.util.postgreSQL.StringUtils +import org.apache.spark.sql.types.{BooleanType, DataType, DateType, IntegerType, NullType, StringType, TimestampType} +import org.apache.spark.unsafe.types.UTF8String abstract class PostgreCastBase extends CastBase{ + override protected def ansiEnabled: Boolean = throw new UnsupportedOperationException("PostgreSQL dialect doesn't support ansi mode") override def nullable: Boolean = child.nullable } + +case class PostgreCastToBoolean(child: Expression, timeZoneId: Option[String]) + extends PostgreCastBase { + + override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = + copy(timeZoneId = Option(timeZoneId)) + + override def checkInputDataTypes(): TypeCheckResult = child.dataType match { + case StringType | IntegerType | NullType => + TypeCheckResult.TypeCheckSuccess + case _ => + TypeCheckResult.TypeCheckFailure(s"cannot cast type ${child.dataType} to boolean") + } + + override def castToBoolean(from: DataType): Any => Any = from match { + case StringType => + buildCast[UTF8String](_, str => { + val s = str.trim().toLowerCase() + if (StringUtils.isTrueString(s)) { + true + } else if (StringUtils.isFalseString(s)) { + false + } else { + throw new IllegalArgumentException(s"invalid input syntax for type boolean: $s") + } + }) + case IntegerType => + super.castToBoolean(from) + } + + override def castToBooleanCode(from: DataType): CastFunction = from match { + case StringType => + val stringUtils = inline"${StringUtils.getClass.getName.stripSuffix("$")}" + (c, evPrim, evNull) => + code""" + if ($stringUtils.isTrueString($c.trim().toLowerCase())) { + $evPrim = true; + } else if ($stringUtils.isFalseString($c.trim().toLowerCase())) { + $evPrim = false; + } else { + throw new IllegalArgumentException("invalid input syntax for type boolean: $c"); + } + """ + + case IntegerType => + super.castToBooleanCode(from) + } + + override def dataType: DataType = BooleanType + + override def toString: String = s"PostgreCastToBoolean($child as ${dataType.simpleString})" + + override def sql: String = s"CAST(${child.sql} AS ${dataType.sql})" +} + +case class PostgreCastToTimestamp(child: Expression, timeZoneId: Option[String]) + extends PostgreCastBase { + override def dataType: DataType = TimestampType + + override def checkInputDataTypes(): TypeCheckResult = child.dataType match { + case StringType | DateType => + TypeCheckResult.TypeCheckSuccess + case _ => + TypeCheckResult.TypeCheckFailure(s"cannot cast type ${child.dataType} to timestamp") + } + /** Returns a copy of this expression with the specified timeZoneId. */ + override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = + copy(timeZoneId = Option(timeZoneId)) + + override def castToTimestamp(from: DataType): Any => Any = from match { + case StringType => + buildCast[UTF8String](_, utfs => DateTimeUtils.stringToTimestamp(utfs, zoneId) + .getOrElse(throw new AnalysisException(s"invalid input syntax for type timestamp:$utfs"))) + case DateType => + super.castToTimestamp(from) + } + + override def castToTimestampCode( + from: DataType, + ctx: CodegenContext): CastFunction = from match { + case StringType => + val zoneIdClass = classOf[ZoneId] + val zid = JavaCode.global( + ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), + zoneIdClass) + val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]]) + (c, evPrim, evNull) => + code""" + scala.Option $longOpt = + org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestamp($c, $zid); + if ($longOpt.isDefined()) { + $evPrim = ((Long) $longOpt.get()).longValue(); + } else { + $evNull = throw new AnalysisException(s"invalid input syntax for type timestamp:$c"); + } + """ + case DateType => + super.castToTimestampCode(from, ctx) + } + + override def toString: String = s"PostgreCastToTimestamp($child as ${dataType.simpleString})" + + override def sql: String = s"CAST(${child.sql} AS ${dataType.sql})" +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastToBoolean.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastToBoolean.scala deleted file mode 100644 index 20559ba3cd79..000000000000 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastToBoolean.scala +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.catalyst.expressions.postgreSQL - -import org.apache.spark.sql.catalyst.analysis.TypeCheckResult -import org.apache.spark.sql.catalyst.expressions.{CastBase, Expression, TimeZoneAwareExpression} -import org.apache.spark.sql.catalyst.expressions.codegen.Block._ -import org.apache.spark.sql.catalyst.util.postgreSQL.StringUtils -import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.UTF8String - -case class PostgreCastToBoolean(child: Expression, timeZoneId: Option[String]) - extends CastBase { - - override protected def ansiEnabled = - throw new UnsupportedOperationException("PostgreSQL dialect doesn't support ansi mode") - - override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = - copy(timeZoneId = Option(timeZoneId)) - - override def checkInputDataTypes(): TypeCheckResult = child.dataType match { - case StringType | IntegerType | NullType => - TypeCheckResult.TypeCheckSuccess - case _ => - TypeCheckResult.TypeCheckFailure(s"cannot cast type ${child.dataType} to boolean") - } - - override def castToBoolean(from: DataType): Any => Any = from match { - case StringType => - buildCast[UTF8String](_, str => { - val s = str.trim().toLowerCase() - if (StringUtils.isTrueString(s)) { - true - } else if (StringUtils.isFalseString(s)) { - false - } else { - throw new IllegalArgumentException(s"invalid input syntax for type boolean: $s") - } - }) - case IntegerType => - super.castToBoolean(from) - } - - override def castToBooleanCode(from: DataType): CastFunction = from match { - case StringType => - val stringUtils = inline"${StringUtils.getClass.getName.stripSuffix("$")}" - (c, evPrim, evNull) => - code""" - if ($stringUtils.isTrueString($c.trim().toLowerCase())) { - $evPrim = true; - } else if ($stringUtils.isFalseString($c.trim().toLowerCase())) { - $evPrim = false; - } else { - throw new IllegalArgumentException("invalid input syntax for type boolean: $c"); - } - """ - - case IntegerType => - super.castToBooleanCode(from) - } - - override def dataType: DataType = BooleanType - - override def nullable: Boolean = child.nullable - - override def toString: String = s"PostgreCastToBoolean($child as ${dataType.simpleString})" - - override def sql: String = s"CAST(${child.sql} AS ${dataType.sql})" -} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastToTimestamp.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastToTimestamp.scala deleted file mode 100644 index 34c5c5a49eae..000000000000 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastToTimestamp.scala +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.catalyst.expressions.postgreSQL - -import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.expressions.{CastBase, Expression, TimeZoneAwareExpression} -import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, JavaCode} -import org.apache.spark.sql.catalyst.expressions.codegen.Block._ -import org.apache.spark.sql.catalyst.util.DateTimeUtils -import org.apache.spark.sql.catalyst.util.DateTimeUtils.epochDaysToMicros -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.UTF8String - -case class PostgreCastToTimestamp(child: Expression, timeZoneId: Option[String]) - extends PostgreCastBase { - override def dataType: DataType = TimestampType - - /** Returns a copy of this expression with the specified timeZoneId. */ - override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = - copy(timeZoneId = Option(timeZoneId)) - - override def castToTimestamp(from: DataType): Any => Any = from match { - case StringType => - buildCast[UTF8String](_, utfs => DateTimeUtils.stringToTimestamp(utfs, zoneId).orNull) - case DateType => - buildCast[Int](_, d => epochDaysToMicros(d, zoneId)) - case _ => - throw new AnalysisException( - s"Cannot cast type $from to Timestamp.") - } - - override def castToTimestampCode( - from: DataType, - ctx: CodegenContext): CastFunction = from match { - case StringType => - super.castToTimestampCode(from, ctx) - case DateType => - super.castToTimestampCode(from, ctx) - case _ => - (c, evPrim, evNull) => - val fromType = JavaCode.javaType(from) - code"""throw new AnalysisException("Cannot cast type $fromType to Timestamp.");""" - } - - override def toString: String = s"PostgreCastToTimestamp($child as ${dataType.simpleString})" - - override def sql: String = s"CAST(${child.sql} AS ${dataType.sql})" -} From 1024d370c97bd24d8b13eec7ad8332738a11cfb9 Mon Sep 17 00:00:00 2001 From: Aman Omer Date: Fri, 15 Nov 2019 20:54:26 +0530 Subject: [PATCH 5/9] Updated PostgreCastBase --- .../catalyst/analysis/PostgreSQLDialect.scala | 2 +- .../postgreSQL/PostgreCastBase.scala | 62 +++++++++---------- .../sql/PostgreSQLDialectQuerySuite.scala | 11 ++-- 3 files changed, 36 insertions(+), 39 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala index e8fb24439355..3f620ebaba2a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.expressions.Cast -import org.apache.spark.sql.catalyst.expressions.postgreSQL.{PostgreCastToBoolean, PostgreCastToTimestamp} +import org.apache.spark.sql.catalyst.expressions.postgreSQL._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastBase.scala index 8ec17ba18b01..e5af5e7a54bb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastBase.scala @@ -25,30 +25,43 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, JavaCo import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.catalyst.util.postgreSQL.StringUtils -import org.apache.spark.sql.types.{BooleanType, DataType, DateType, IntegerType, NullType, StringType, TimestampType} +import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String -abstract class PostgreCastBase extends CastBase{ +abstract class PostgreCastBase(toType: DataType) extends CastBase { + + def fromTypes: TypeCollection + + override def dataType: DataType = toType override protected def ansiEnabled: Boolean = throw new UnsupportedOperationException("PostgreSQL dialect doesn't support ansi mode") + override def checkInputDataTypes(): TypeCheckResult = { + if (!fromTypes.acceptsType(child.dataType)) { + TypeCheckResult.TypeCheckFailure( + s"cannot cast type ${child.dataType.simpleString} to ${toType.simpleString}") + } else { + TypeCheckResult.TypeCheckSuccess + } + } + override def nullable: Boolean = child.nullable + + override def sql: String = s"CAST(${child.sql} AS ${toType.sql})" + + override def toString: String = + s"PostgreCastTo${toType.simpleString}($child as ${toType.simpleString})" } case class PostgreCastToBoolean(child: Expression, timeZoneId: Option[String]) - extends PostgreCastBase { + extends PostgreCastBase(BooleanType) { + + override def fromTypes: TypeCollection = TypeCollection(StringType, IntegerType, NullType) override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) - override def checkInputDataTypes(): TypeCheckResult = child.dataType match { - case StringType | IntegerType | NullType => - TypeCheckResult.TypeCheckSuccess - case _ => - TypeCheckResult.TypeCheckFailure(s"cannot cast type ${child.dataType} to boolean") - } - override def castToBoolean(from: DataType): Any => Any = from match { case StringType => buildCast[UTF8String](_, str => { @@ -68,7 +81,7 @@ case class PostgreCastToBoolean(child: Expression, timeZoneId: Option[String]) override def castToBooleanCode(from: DataType): CastFunction = from match { case StringType => val stringUtils = inline"${StringUtils.getClass.getName.stripSuffix("$")}" - (c, evPrim, evNull) => + (c, evPrim, _) => code""" if ($stringUtils.isTrueString($c.trim().toLowerCase())) { $evPrim = true; @@ -78,29 +91,16 @@ case class PostgreCastToBoolean(child: Expression, timeZoneId: Option[String]) throw new IllegalArgumentException("invalid input syntax for type boolean: $c"); } """ - case IntegerType => super.castToBooleanCode(from) } - - override def dataType: DataType = BooleanType - - override def toString: String = s"PostgreCastToBoolean($child as ${dataType.simpleString})" - - override def sql: String = s"CAST(${child.sql} AS ${dataType.sql})" } case class PostgreCastToTimestamp(child: Expression, timeZoneId: Option[String]) - extends PostgreCastBase { - override def dataType: DataType = TimestampType + extends PostgreCastBase(TimestampType) { + + override def fromTypes: TypeCollection = TypeCollection(StringType, DateType, NullType) - override def checkInputDataTypes(): TypeCheckResult = child.dataType match { - case StringType | DateType => - TypeCheckResult.TypeCheckSuccess - case _ => - TypeCheckResult.TypeCheckFailure(s"cannot cast type ${child.dataType} to timestamp") - } - /** Returns a copy of this expression with the specified timeZoneId. */ override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) @@ -121,21 +121,17 @@ case class PostgreCastToTimestamp(child: Expression, timeZoneId: Option[String]) ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), zoneIdClass) val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]]) - (c, evPrim, evNull) => + (c, evPrim, _) => code""" scala.Option $longOpt = org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestamp($c, $zid); if ($longOpt.isDefined()) { $evPrim = ((Long) $longOpt.get()).longValue(); } else { - $evNull = throw new AnalysisException(s"invalid input syntax for type timestamp:$c"); + throw new AnalysisException(s"invalid input syntax for type timestamp:$c"); } """ case DateType => super.castToTimestampCode(from, ctx) } - - override def toString: String = s"PostgreCastToTimestamp($child as ${dataType.simpleString})" - - override def sql: String = s"CAST(${child.sql} AS ${dataType.sql})" } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PostgreSQLDialectQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PostgreSQLDialectQuerySuite.scala index e64668f4f330..688fa6d7a741 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/PostgreSQLDialectQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/PostgreSQLDialectQuerySuite.scala @@ -42,11 +42,12 @@ class PostgreSQLDialectQuerySuite extends QueryTest with SharedSparkSession { test("cast to timestamp") { Seq(1, 0.1, 1.toDouble, 5.toFloat, true, 3.toByte, 4.toShort) foreach { value => - val actualResult = intercept[AnalysisException]( - sql(s"SELECT CAST(${value} AS timestamp)") - ).getMessage - val expectedResult = s"Cannot cast type ${value.getClass} to Timestamp." - assert(actualResult == expectedResult) + val actualResult = intercept[AnalysisException]( + sql(s"SELECT CAST(${value} AS timestamp)") + ).getMessage + val expectedResult = s"cannot cast type ${value.getClass} to timestamp" + assert(actualResult.contains(expectedResult)) } } } + From 700143d4130a82bc8c78c70da8929ebb62d870bc Mon Sep 17 00:00:00 2001 From: Aman Omer Date: Thu, 21 Nov 2019 11:46:23 +0530 Subject: [PATCH 6/9] Moved test cases to postgreSQL/cast.sql --- .../catalyst/analysis/PostgreSQLDialect.scala | 13 ++--- .../postgreSQL/PostgreCastBase.scala | 5 +- .../expressions/postgreSQL/CastSuite.scala | 2 +- .../sql-tests/inputs/postgreSQL/cast.sql | 7 +++ .../sql-tests/results/postgreSQL/cast.sql.out | 57 +++++++++++++++++++ .../sql/PostgreSQLDialectQuerySuite.scala | 6 +- 6 files changed, 75 insertions(+), 15 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/postgreSQL/cast.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/postgreSQL/cast.sql.out diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala index 3f620ebaba2a..4608f358c4b3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala @@ -26,16 +26,16 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BooleanType, TimestampType} object PostgreSQLDialect { - val postgreSQLDialectRules: List[Rule[LogicalPlan]] = - CastToBoolean :: CastToTimestamp :: - Nil + val postgreSQLDialectRules: Seq[Rule[LogicalPlan]] = Seq( + CastToBoolean, + CastToTimestamp + ) object CastToBoolean extends Rule[LogicalPlan] with Logging { override def apply(plan: LogicalPlan): LogicalPlan = { // The SQL configuration `spark.sql.dialect` can be changed in runtime. // To make sure the configuration is effective, we have to check it during rule execution. - val conf = SQLConf.get - if (conf.usePostgreSQLDialect) { + if (SQLConf.get.usePostgreSQLDialect) { plan.transformExpressions { case Cast(child, dataType, timeZoneId) if child.dataType != BooleanType && dataType == BooleanType => @@ -49,8 +49,7 @@ object PostgreSQLDialect { object CastToTimestamp extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = { - val conf = SQLConf.get - if (conf.usePostgreSQLDialect) { + if (SQLConf.get.usePostgreSQLDialect) { plan.transformExpressions { case Cast(child, dataType, timeZoneId) if dataType == TimestampType => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastBase.scala index e5af5e7a54bb..94d395d75180 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastBase.scala @@ -107,7 +107,8 @@ case class PostgreCastToTimestamp(child: Expression, timeZoneId: Option[String]) override def castToTimestamp(from: DataType): Any => Any = from match { case StringType => buildCast[UTF8String](_, utfs => DateTimeUtils.stringToTimestamp(utfs, zoneId) - .getOrElse(throw new AnalysisException(s"invalid input syntax for type timestamp:$utfs"))) + .getOrElse(throw new + IllegalArgumentException(s"invalid input syntax for type timestamp:$utfs"))) case DateType => super.castToTimestamp(from) } @@ -128,7 +129,7 @@ case class PostgreCastToTimestamp(child: Expression, timeZoneId: Option[String]) if ($longOpt.isDefined()) { $evPrim = ((Long) $longOpt.get()).longValue(); } else { - throw new AnalysisException(s"invalid input syntax for type timestamp:$c"); + throw new IllegalArgumentException(s"invalid input syntax for type timestamp:$c"); } """ case DateType => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/CastSuite.scala index 6e8e948b7b14..675ebee2fe57 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/CastSuite.scala @@ -71,7 +71,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { assert(PostgreCastToBoolean(Literal(1.toFloat), None).checkInputDataTypes().isFailure) } - test("unsupported data types to cast to tiestamp") { + test("unsupported data types to cast to timestamp") { assert(PostgreCastToTimestamp(Literal(1), None).checkInputDataTypes().isFailure) assert(PostgreCastToTimestamp(Literal(1.toByte), None).checkInputDataTypes().isFailure) assert(PostgreCastToTimestamp(Literal(1.toDouble), None).checkInputDataTypes().isFailure) diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/cast.sql new file mode 100644 index 000000000000..0de8d3833c86 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/cast.sql @@ -0,0 +1,7 @@ +SELECT CAST(1 AS timestamp); +SELECT CAST(1.1 AS timestamp); +SELECT CAST(CAST(1 AS float) AS timestamp); +SELECT CAST(CAST(1 AS boolean) AS timestamp); +SELECT CAST(CAST(1 AS byte) AS timestamp); +SELECT CAST(CAST(1 AS short) AS timestamp); +SELECT CAST(CAST(1 AS double) AS timestamp); diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/cast.sql.out new file mode 100644 index 000000000000..cb52b4f6a76a --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/cast.sql.out @@ -0,0 +1,57 @@ +-- Number of queries: 7 + + +-- !query 0 +SELECT CAST(1 AS timestamp) +-- !query 0 schema +struct<> +-- !query 0 output +org.apache.spark.sql.AnalysisException + + +-- !query 1 +SELECT CAST(1.1 AS timestamp) +-- !query 1 schema +struct<> +-- !query 1 output +org.apache.spark.sql.AnalysisException + + +-- !query 2 +SELECT CAST(CAST(1 AS float) AS timestamp) +-- !query 2 schema +struct<> +-- !query 2 output +org.apache.spark.sql.AnalysisException + + +-- !query 3 +SELECT CAST(CAST(1 AS boolean) AS timestamp) +-- !query 3 schema +struct<> +-- !query 3 output +org.apache.spark.sql.AnalysisException + + +-- !query 4 +SELECT CAST(CAST(1 AS byte) AS timestamp) +-- !query 4 schema +struct<> +-- !query 4 output +org.apache.spark.sql.AnalysisException + + +-- !query 5 +SELECT CAST(CAST(1 AS short) AS timestamp) +-- !query 5 schema +struct<> +-- !query 5 output +org.apache.spark.sql.AnalysisException + + +-- !query 6 +SELECT CAST(CAST(1 AS double) AS timestamp) +-- !query 6 schema +struct<> +-- !query 6 output +org.apache.spark.sql.AnalysisException diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PostgreSQLDialectQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PostgreSQLDialectQuerySuite.scala index 688fa6d7a741..2f709f776fa1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/PostgreSQLDialectQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/PostgreSQLDialectQuerySuite.scala @@ -42,11 +42,7 @@ class PostgreSQLDialectQuerySuite extends QueryTest with SharedSparkSession { test("cast to timestamp") { Seq(1, 0.1, 1.toDouble, 5.toFloat, true, 3.toByte, 4.toShort) foreach { value => - val actualResult = intercept[AnalysisException]( - sql(s"SELECT CAST(${value} AS timestamp)") - ).getMessage - val expectedResult = s"cannot cast type ${value.getClass} to timestamp" - assert(actualResult.contains(expectedResult)) + intercept[IllegalArgumentException](sql(s"select cast('$value' as timestamp)").collect()) } } } From 111d673348384eb1ee4d5ee05545cea7dba76cb9 Mon Sep 17 00:00:00 2001 From: Aman Omer Date: Thu, 21 Nov 2019 16:07:00 +0530 Subject: [PATCH 7/9] Test cases updated --- .../sql-tests/inputs/postgreSQL/cast.sql | 3 ++ .../sql-tests/results/postgreSQL/cast.sql.out | 33 ++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/cast.sql index 0de8d3833c86..918034f1e3ce 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/cast.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/cast.sql @@ -5,3 +5,6 @@ SELECT CAST(CAST(1 AS boolean) AS timestamp); SELECT CAST(CAST(1 AS byte) AS timestamp); SELECT CAST(CAST(1 AS short) AS timestamp); SELECT CAST(CAST(1 AS double) AS timestamp); +SELECT CAST(CAST('2019' AS date) AS timestamp) +SELECT CAST(NULL AS timestamp) +SELECT CAST('2019' AS timestamp) diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/cast.sql.out index cb52b4f6a76a..a897b610ddcc 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/cast.sql.out @@ -1,4 +1,4 @@ --- Number of queries: 7 +-- Number of queries: 10 -- !query 0 @@ -7,6 +7,7 @@ SELECT CAST(1 AS timestamp) struct<> -- !query 0 output org.apache.spark.sql.AnalysisException +cannot resolve 'CAST(1 AS TIMESTAMP)' due to data type mismatch: cannot cast type int to timestamp -- !query 1 @@ -15,6 +16,7 @@ SELECT CAST(1.1 AS timestamp) struct<> -- !query 1 output org.apache.spark.sql.AnalysisException +cannot resolve 'CAST(1.1BD AS TIMESTAMP)' due to data type mismatch: cannot cast type decimal(2,1) to timestamp -- !query 2 @@ -23,6 +25,7 @@ SELECT CAST(CAST(1 AS float) AS timestamp) struct<> -- !query 2 output org.apache.spark.sql.AnalysisException +cannot resolve 'CAST(CAST(1 AS FLOAT) AS TIMESTAMP)' due to data type mismatch: cannot cast type float to timestamp -- !query 3 @@ -31,6 +34,7 @@ SELECT CAST(CAST(1 AS boolean) AS timestamp) struct<> -- !query 3 output org.apache.spark.sql.AnalysisException +cannot resolve 'CAST(CAST(1 AS BOOLEAN) AS TIMESTAMP)' due to data type mismatch: cannot cast type boolean to timestamp -- !query 4 @@ -39,6 +43,7 @@ SELECT CAST(CAST(1 AS byte) AS timestamp) struct<> -- !query 4 output org.apache.spark.sql.AnalysisException +cannot resolve 'CAST(CAST(1 AS TINYINT) AS TIMESTAMP)' due to data type mismatch: cannot cast type tinyint to timestamp -- !query 5 @@ -47,6 +52,7 @@ SELECT CAST(CAST(1 AS short) AS timestamp) struct<> -- !query 5 output org.apache.spark.sql.AnalysisException +cannot resolve 'CAST(CAST(1 AS SMALLINT) AS TIMESTAMP)' due to data type mismatch: cannot cast type smallint to timestamp -- !query 6 @@ -55,3 +61,28 @@ SELECT CAST(CAST(1 AS double) AS timestamp) struct<> -- !query 6 output org.apache.spark.sql.AnalysisException +cannot resolve 'CAST(CAST(1 AS DOUBLE) AS TIMESTAMP)' due to data type mismatch: cannot cast type double to timestamp + + +-- !query 7 +SELECT CAST(CAST('2019' AS date) AS timestamp) +-- !query 7 schema +struct +-- !query 7 output +2019-01-01 00:00:00.0 + + +-- !query 8 +SELECT CAST(NULL AS timestamp) +-- !query 8 schema +struct +-- !query 8 output + + + +-- !query 9 +SELECT CAST('2019' AS timestamp) +-- !query 9 schema +struct +-- !query 9 output +2019-01-01 00:00:00.0 From 4fc3d71eaef3c1687f02a71bba2cc4c8d6ca0dbf Mon Sep 17 00:00:00 2001 From: Aman Omer Date: Tue, 26 Nov 2019 11:05:28 +0530 Subject: [PATCH 8/9] 1 -> 1.toInt ; child.dataType != TimestampType --- .../spark/sql/catalyst/analysis/PostgreSQLDialect.scala | 4 ++-- .../spark/sql/catalyst/expressions/postgreSQL/CastSuite.scala | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala index 4608f358c4b3..0381b90dab7d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.expressions.postgreSQL._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.{BooleanType, TimestampType} +import org.apache.spark.sql.types._ object PostgreSQLDialect { val postgreSQLDialectRules: Seq[Rule[LogicalPlan]] = Seq( @@ -52,7 +52,7 @@ object PostgreSQLDialect { if (SQLConf.get.usePostgreSQLDialect) { plan.transformExpressions { case Cast(child, dataType, timeZoneId) - if dataType == TimestampType => + if child.dataType != TimestampType && dataType == TimestampType => PostgreCastToTimestamp(child, timeZoneId) } } else { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/CastSuite.scala index 675ebee2fe57..2de9ad61448a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/CastSuite.scala @@ -72,7 +72,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { } test("unsupported data types to cast to timestamp") { - assert(PostgreCastToTimestamp(Literal(1), None).checkInputDataTypes().isFailure) + assert(PostgreCastToTimestamp(Literal(1.toInt), None).checkInputDataTypes().isFailure) assert(PostgreCastToTimestamp(Literal(1.toByte), None).checkInputDataTypes().isFailure) assert(PostgreCastToTimestamp(Literal(1.toDouble), None).checkInputDataTypes().isFailure) assert(PostgreCastToTimestamp(Literal(1.toFloat), None).checkInputDataTypes().isFailure) From 08c71f1eaf0e6f867b964832ccc360a50cf6fc53 Mon Sep 17 00:00:00 2001 From: Aman Omer Date: Thu, 28 Nov 2019 00:07:38 +0530 Subject: [PATCH 9/9] Rules PostgreCastToBoolean, PostgreCastToTimestamp combined in PostgreCast --- .../catalyst/analysis/PostgreSQLDialect.scala | 23 ++++--------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala index 0381b90dab7d..8d4acd1608f5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.catalyst.analysis -import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.expressions.Cast import org.apache.spark.sql.catalyst.expressions.postgreSQL._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan @@ -27,32 +26,18 @@ import org.apache.spark.sql.types._ object PostgreSQLDialect { val postgreSQLDialectRules: Seq[Rule[LogicalPlan]] = Seq( - CastToBoolean, - CastToTimestamp + PostgresCast ) - object CastToBoolean extends Rule[LogicalPlan] with Logging { + object PostgresCast extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = { - // The SQL configuration `spark.sql.dialect` can be changed in runtime. - // To make sure the configuration is effective, we have to check it during rule execution. if (SQLConf.get.usePostgreSQLDialect) { plan.transformExpressions { case Cast(child, dataType, timeZoneId) - if child.dataType != BooleanType && dataType == BooleanType => + if dataType == BooleanType && child.dataType != BooleanType => PostgreCastToBoolean(child, timeZoneId) - } - } else { - plan - } - } - } - - object CastToTimestamp extends Rule[LogicalPlan] { - override def apply(plan: LogicalPlan): LogicalPlan = { - if (SQLConf.get.usePostgreSQLDialect) { - plan.transformExpressions { case Cast(child, dataType, timeZoneId) - if child.dataType != TimestampType && dataType == TimestampType => + if dataType == TimestampType && child.dataType != TimestampType => PostgreCastToTimestamp(child, timeZoneId) } } else {