diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 8a6bd5ed27360..cb33930d5e2da 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -79,10 +79,6 @@ singleTableSchema : colTypeList EOF ; -singleInterval - : INTERVAL? multiUnitsInterval EOF - ; - statement : query #statementDefault | ctes? dmlStatementNoWith #dmlStatement diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index f3b58fa3137b1..81627e85fcfb5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -467,7 +467,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit // IntervalConverter private[this] def castToInterval(from: DataType): Any => Any = from match { case StringType => - buildCast[UTF8String](_, s => IntervalUtils.stringToInterval(s)) + buildCast[UTF8String](_, s => IntervalUtils.safeStringToInterval(s)) } // LongConverter @@ -1216,7 +1216,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case StringType => val util = IntervalUtils.getClass.getCanonicalName.stripSuffix("$") (c, evPrim, evNull) => - code"""$evPrim = $util.stringToInterval($c); + code"""$evPrim = $util.safeStringToInterval($c); if(${evPrim} == null) { ${evNull} = true; } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala index 69badb9562dc3..caacb71814f17 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_DAY import org.apache.spark.sql.catalyst.util.IntervalUtils import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String case class TimeWindow( timeColumn: Expression, @@ -103,7 +104,7 @@ object TimeWindow { * precision. */ private def getIntervalInMicroSeconds(interval: String): Long = { - val cal = IntervalUtils.fromString(interval) + val cal = IntervalUtils.stringToInterval(UTF8String.fromString(interval)) if (cal.months != 0) { throw new IllegalArgumentException( s"Intervals greater than a month is not supported ($interval).") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 73233f934bedc..4ab1a078500f3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -102,10 +102,6 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging withOrigin(ctx)(StructType(visitColTypeList(ctx.colTypeList))) } - override def visitSingleInterval(ctx: SingleIntervalContext): CalendarInterval = { - withOrigin(ctx)(visitMultiUnitsInterval(ctx.multiUnitsInterval)) - } - /* ******************************************************************************************** * Plan parsing * ******************************************************************************************** */ @@ -1870,7 +1866,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging toLiteral(stringToTimestamp(_, zoneId), TimestampType) case "INTERVAL" => val interval = try { - IntervalUtils.fromString(value) + IntervalUtils.stringToInterval(UTF8String.fromString(value)) } catch { case e: IllegalArgumentException => val ex = new ParseException("Cannot parse the INTERVAL value: " + value, ctx) @@ -2069,22 +2065,20 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging */ override def visitMultiUnitsInterval(ctx: MultiUnitsIntervalContext): CalendarInterval = { withOrigin(ctx) { - val units = ctx.intervalUnit().asScala.map { unit => - val u = unit.getText.toLowerCase(Locale.ROOT) - // Handle plural forms, e.g: yearS/monthS/weekS/dayS/hourS/minuteS/hourS/... - if (u.endsWith("s")) u.substring(0, u.length - 1) else u - }.map(IntervalUtils.IntervalUnit.withName).toArray - - val values = ctx.intervalValue().asScala.map { value => - if (value.STRING() != null) { - string(value.STRING()) - } else { - value.getText - } - }.toArray - + val units = ctx.intervalUnit().asScala + val values = ctx.intervalValue().asScala try { - IntervalUtils.fromUnitStrings(units, values) + assert(units.length == values.length) + val kvs = units.indices.map { i => + val u = units(i).getText + val v = if (values(i).STRING() != null) { + string(values(i).STRING()) + } else { + values(i).getText + } + UTF8String.fromString(" " + v + " " + u) + } + IntervalUtils.stringToInterval(UTF8String.concat(kvs: _*)) } catch { case i: IllegalArgumentException => val e = new ParseException(i.getMessage, ctx) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index b66cae7979416..a84d29b71ac42 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -29,21 +29,12 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.trees.Origin import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{DataType, StructType} -import org.apache.spark.unsafe.types.CalendarInterval /** * Base SQL parsing infrastructure. */ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Logging { - /** - * Creates [[CalendarInterval]] for a given SQL String. Throws [[ParseException]] if the SQL - * string is not a valid interval format. - */ - def parseInterval(sqlText: String): CalendarInterval = parse(sqlText) { parser => - astBuilder.visitSingleInterval(parser.singleInterval()) - } - /** Creates/Resolves DataType for a given SQL string. */ override def parseDataType(sqlText: String): DataType = parse(sqlText) { parser => astBuilder.visitSingleDataType(parser.singleDataType()) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 991312bff30aa..22e9116dd1de7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -21,7 +21,6 @@ import java.util.concurrent.TimeUnit import scala.util.control.NonFatal -import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} import org.apache.spark.sql.catalyst.util.DateTimeConstants._ import org.apache.spark.sql.types.Decimal import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} @@ -101,34 +100,6 @@ object IntervalUtils { Decimal(result, 18, 6) } - /** - * Converts a string to [[CalendarInterval]] case-insensitively. - * - * @throws IllegalArgumentException if the input string is not in valid interval format. - */ - def fromString(str: String): CalendarInterval = { - if (str == null) throw new IllegalArgumentException("Interval string cannot be null") - try { - CatalystSqlParser.parseInterval(str) - } catch { - case e: ParseException => - val ex = new IllegalArgumentException(s"Invalid interval string: $str\n" + e.message) - ex.setStackTrace(e.getStackTrace) - throw ex - } - } - - /** - * A safe version of `fromString`. It returns null for invalid input string. - */ - def safeFromString(str: String): CalendarInterval = { - try { - fromString(str) - } catch { - case _: IllegalArgumentException => null - } - } - private def toLongWithRange( fieldName: IntervalUnit, s: String, @@ -250,46 +221,6 @@ object IntervalUtils { } } - def fromUnitStrings(units: Array[IntervalUnit], values: Array[String]): CalendarInterval = { - assert(units.length == values.length) - var months: Int = 0 - var days: Int = 0 - var microseconds: Long = 0 - var i = 0 - while (i < units.length) { - try { - units(i) match { - case YEAR => - months = Math.addExact(months, Math.multiplyExact(values(i).toInt, 12)) - case MONTH => - months = Math.addExact(months, values(i).toInt) - case WEEK => - days = Math.addExact(days, Math.multiplyExact(values(i).toInt, 7)) - case DAY => - days = Math.addExact(days, values(i).toInt) - case HOUR => - val hoursUs = Math.multiplyExact(values(i).toLong, MICROS_PER_HOUR) - microseconds = Math.addExact(microseconds, hoursUs) - case MINUTE => - val minutesUs = Math.multiplyExact(values(i).toLong, MICROS_PER_MINUTE) - microseconds = Math.addExact(microseconds, minutesUs) - case SECOND => - microseconds = Math.addExact(microseconds, parseSecondNano(values(i))) - case MILLISECOND => - val millisUs = Math.multiplyExact(values(i).toLong, MICROS_PER_MILLIS) - microseconds = Math.addExact(microseconds, millisUs) - case MICROSECOND => - microseconds = Math.addExact(microseconds, values(i).toLong) - } - } catch { - case e: Exception => - throw new IllegalArgumentException(s"Error parsing interval string: ${e.getMessage}", e) - } - i += 1 - } - new CalendarInterval(months, days, microseconds) - } - // Parses a string with nanoseconds, truncates the result and returns microseconds private def parseNanos(nanosStr: String, isNegative: Boolean): Long = { if (nanosStr != null) { @@ -305,30 +236,6 @@ object IntervalUtils { } } - /** - * Parse second_nano string in ss.nnnnnnnnn format to microseconds - */ - private def parseSecondNano(secondNano: String): Long = { - def parseSeconds(secondsStr: String): Long = { - toLongWithRange( - SECOND, - secondsStr, - Long.MinValue / MICROS_PER_SECOND, - Long.MaxValue / MICROS_PER_SECOND) * MICROS_PER_SECOND - } - - secondNano.split("\\.") match { - case Array(secondsStr) => parseSeconds(secondsStr) - case Array("", nanosStr) => parseNanos(nanosStr, false) - case Array(secondsStr, nanosStr) => - val seconds = parseSeconds(secondsStr) - Math.addExact(seconds, parseNanos(nanosStr, seconds < 0)) - case _ => - throw new IllegalArgumentException( - "Interval string does not match second-nano format of ss.nnnnnnnnn") - } - } - /** * Gets interval duration * @@ -452,18 +359,37 @@ object IntervalUtils { private final val millisStr = unitToUtf8(MILLISECOND) private final val microsStr = unitToUtf8(MICROSECOND) + /** + * A safe version of `stringToInterval`. It returns null for invalid input string. + */ + def safeStringToInterval(input: UTF8String): CalendarInterval = { + try { + stringToInterval(input) + } catch { + case _: IllegalArgumentException => null + } + } + + /** + * Converts a string to [[CalendarInterval]] case-insensitively. + * + * @throws IllegalArgumentException if the input string is not in valid interval format. + */ def stringToInterval(input: UTF8String): CalendarInterval = { import ParseState._ + def throwIAE(msg: String, e: Exception = null) = { + throw new IllegalArgumentException(s"Error parsing '$input' to interval, $msg", e) + } if (input == null) { - return null + throwIAE("interval string cannot be null") } // scalastyle:off caselocale .toLowerCase val s = input.trim.toLowerCase // scalastyle:on val bytes = s.getBytes if (bytes.isEmpty) { - return null + throwIAE("interval string cannot be empty") } var state = PREFIX var i = 0 @@ -482,13 +408,19 @@ object IntervalUtils { } } + def currentWord: UTF8String = { + val strings = s.split(UTF8String.blankString(1), -1) + val lenRight = s.substring(i, s.numBytes()).split(UTF8String.blankString(1), -1).length + strings(strings.length - lenRight) + } + while (i < bytes.length) { val b = bytes(i) state match { case PREFIX => if (s.startsWith(intervalStr)) { if (s.numBytes() == intervalStr.numBytes()) { - return null + throwIAE("interval string cannot be empty") } else { i += intervalStr.numBytes() } @@ -521,7 +453,7 @@ object IntervalUtils { fractionScale = (NANOS_PER_SECOND / 10).toInt i += 1 state = VALUE_FRACTIONAL_PART - case _ => return null + case _ => throwIAE( s"unrecognized number '$currentWord'") } case TRIM_BEFORE_VALUE => trimToNextState(b, VALUE) case VALUE => @@ -530,13 +462,13 @@ object IntervalUtils { try { currentValue = Math.addExact(Math.multiplyExact(10, currentValue), (b - '0')) } catch { - case _: ArithmeticException => return null + case e: ArithmeticException => throwIAE(e.getMessage, e) } case ' ' => state = TRIM_BEFORE_UNIT case '.' => fractionScale = (NANOS_PER_SECOND / 10).toInt state = VALUE_FRACTIONAL_PART - case _ => return null + case _ => throwIAE(s"invalid value '$currentWord'") } i += 1 case VALUE_FRACTIONAL_PART => @@ -547,14 +479,17 @@ object IntervalUtils { case ' ' => fraction /= NANOS_PER_MICROS.toInt state = TRIM_BEFORE_UNIT - case _ => return null + case _ if '0' <= b && b <= '9' => + throwIAE(s"interval can only support nanosecond precision, '$currentWord' is out" + + s" of range") + case _ => throwIAE(s"invalid value '$currentWord'") } i += 1 case TRIM_BEFORE_UNIT => trimToNextState(b, UNIT_BEGIN) case UNIT_BEGIN => // Checks that only seconds can have the fractional part if (b != 's' && fractionScale >= 0) { - return null + throwIAE(s"'$currentWord' cannot have fractional part") } if (isNegative) { currentValue = -currentValue @@ -598,18 +533,18 @@ object IntervalUtils { } else if (s.matchAt(microsStr, i)) { microseconds = Math.addExact(microseconds, currentValue) i += microsStr.numBytes() - } else return null - case _ => return null + } else throwIAE(s"invalid unit '$currentWord'") + case _ => throwIAE(s"invalid unit '$currentWord'") } } catch { - case _: ArithmeticException => return null + case e: ArithmeticException => throwIAE(e.getMessage, e) } state = UNIT_SUFFIX case UNIT_SUFFIX => b match { case 's' => state = UNIT_END case ' ' => state = TRIM_BEFORE_SIGN - case _ => return null + case _ => throwIAE(s"invalid unit '$currentWord'") } i += 1 case UNIT_END => @@ -617,7 +552,7 @@ object IntervalUtils { case ' ' => i += 1 state = TRIM_BEFORE_SIGN - case _ => return null + case _ => throwIAE(s"invalid unit '$currentWord'") } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index 3287c83b1dd87..cad6bd6d18694 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -32,9 +32,12 @@ import org.apache.spark.sql.catalyst.util.IntervalUtils._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH +import org.apache.spark.unsafe.types.UTF8String class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { + implicit def stringToUTF8Str(str: String): UTF8String = UTF8String.fromString(str) + def testSize(sizeOfNull: Any): Unit = { val a0 = Literal.create(Seq(1, 2, 3), ArrayType(IntegerType)) val a1 = Literal.create(Seq[Integer](), ArrayType(IntegerType)) @@ -721,7 +724,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(new Sequence( Literal(Timestamp.valueOf("2018-01-01 00:00:00")), Literal(Timestamp.valueOf("2018-01-02 00:00:00")), - Literal(fromString("interval 12 hours"))), + Literal(stringToInterval("interval 12 hours"))), Seq( Timestamp.valueOf("2018-01-01 00:00:00"), Timestamp.valueOf("2018-01-01 12:00:00"), @@ -730,7 +733,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(new Sequence( Literal(Timestamp.valueOf("2018-01-01 00:00:00")), Literal(Timestamp.valueOf("2018-01-02 00:00:01")), - Literal(fromString("interval 12 hours"))), + Literal(stringToInterval("interval 12 hours"))), Seq( Timestamp.valueOf("2018-01-01 00:00:00"), Timestamp.valueOf("2018-01-01 12:00:00"), @@ -739,7 +742,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(new Sequence( Literal(Timestamp.valueOf("2018-01-02 00:00:00")), Literal(Timestamp.valueOf("2018-01-01 00:00:00")), - Literal(negate(fromString("interval 12 hours")))), + Literal(negate(stringToInterval("interval 12 hours")))), Seq( Timestamp.valueOf("2018-01-02 00:00:00"), Timestamp.valueOf("2018-01-01 12:00:00"), @@ -748,7 +751,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(new Sequence( Literal(Timestamp.valueOf("2018-01-02 00:00:00")), Literal(Timestamp.valueOf("2017-12-31 23:59:59")), - Literal(negate(fromString("interval 12 hours")))), + Literal(negate(stringToInterval("interval 12 hours")))), Seq( Timestamp.valueOf("2018-01-02 00:00:00"), Timestamp.valueOf("2018-01-01 12:00:00"), @@ -757,7 +760,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(new Sequence( Literal(Timestamp.valueOf("2018-01-01 00:00:00")), Literal(Timestamp.valueOf("2018-03-01 00:00:00")), - Literal(fromString("interval 1 month"))), + Literal(stringToInterval("interval 1 month"))), Seq( Timestamp.valueOf("2018-01-01 00:00:00"), Timestamp.valueOf("2018-02-01 00:00:00"), @@ -766,7 +769,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(new Sequence( Literal(Timestamp.valueOf("2018-03-01 00:00:00")), Literal(Timestamp.valueOf("2018-01-01 00:00:00")), - Literal(negate(fromString("interval 1 month")))), + Literal(negate(stringToInterval("interval 1 month")))), Seq( Timestamp.valueOf("2018-03-01 00:00:00"), Timestamp.valueOf("2018-02-01 00:00:00"), @@ -775,7 +778,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(new Sequence( Literal(Timestamp.valueOf("2018-03-03 00:00:00")), Literal(Timestamp.valueOf("2018-01-01 00:00:00")), - Literal(negate(fromString("interval 1 month 1 day")))), + Literal(negate(stringToInterval("interval 1 month 1 day")))), Seq( Timestamp.valueOf("2018-03-03 00:00:00"), Timestamp.valueOf("2018-02-02 00:00:00"), @@ -784,7 +787,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(new Sequence( Literal(Timestamp.valueOf("2018-01-31 00:00:00")), Literal(Timestamp.valueOf("2018-04-30 00:00:00")), - Literal(fromString("interval 1 month"))), + Literal(stringToInterval("interval 1 month"))), Seq( Timestamp.valueOf("2018-01-31 00:00:00"), Timestamp.valueOf("2018-02-28 00:00:00"), @@ -794,7 +797,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(new Sequence( Literal(Timestamp.valueOf("2018-01-01 00:00:00")), Literal(Timestamp.valueOf("2018-03-01 00:00:00")), - Literal(fromString("interval 1 month 1 second"))), + Literal(stringToInterval("interval 1 month 1 second"))), Seq( Timestamp.valueOf("2018-01-01 00:00:00"), Timestamp.valueOf("2018-02-01 00:00:01"))) @@ -802,7 +805,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(new Sequence( Literal(Timestamp.valueOf("2018-01-01 00:00:00")), Literal(Timestamp.valueOf("2018-03-01 00:04:06")), - Literal(fromString("interval 1 month 2 minutes 3 seconds"))), + Literal(stringToInterval("interval 1 month 2 minutes 3 seconds"))), Seq( Timestamp.valueOf("2018-01-01 00:00:00"), Timestamp.valueOf("2018-02-01 00:02:03"), @@ -840,7 +843,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(new Sequence( Literal(Timestamp.valueOf("2018-03-25 01:30:00")), Literal(Timestamp.valueOf("2018-03-25 03:30:00")), - Literal(fromString("interval 30 minutes"))), + Literal(stringToInterval("interval 30 minutes"))), Seq( Timestamp.valueOf("2018-03-25 01:30:00"), Timestamp.valueOf("2018-03-25 03:00:00"), @@ -850,7 +853,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(new Sequence( Literal(Timestamp.valueOf("2018-10-28 01:30:00")), Literal(Timestamp.valueOf("2018-10-28 03:30:00")), - Literal(fromString("interval 30 minutes"))), + Literal(stringToInterval("interval 30 minutes"))), Seq( Timestamp.valueOf("2018-10-28 01:30:00"), noDST(Timestamp.valueOf("2018-10-28 02:00:00")), @@ -867,7 +870,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(new Sequence( Literal(Date.valueOf("2018-01-01")), Literal(Date.valueOf("2018-01-05")), - Literal(fromString("interval 2 days"))), + Literal(stringToInterval("interval 2 days"))), Seq( Date.valueOf("2018-01-01"), Date.valueOf("2018-01-03"), @@ -876,7 +879,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(new Sequence( Literal(Date.valueOf("2018-01-01")), Literal(Date.valueOf("2018-03-01")), - Literal(fromString("interval 1 month"))), + Literal(stringToInterval("interval 1 month"))), Seq( Date.valueOf("2018-01-01"), Date.valueOf("2018-02-01"), @@ -885,7 +888,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(new Sequence( Literal(Date.valueOf("2018-01-31")), Literal(Date.valueOf("2018-04-30")), - Literal(fromString("interval 1 month"))), + Literal(stringToInterval("interval 1 month"))), Seq( Date.valueOf("2018-01-31"), Date.valueOf("2018-02-28"), @@ -906,14 +909,14 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper new Sequence( Literal(Date.valueOf("1970-01-02")), Literal(Date.valueOf("1970-01-01")), - Literal(fromString("interval 1 day"))), + Literal(stringToInterval("interval 1 day"))), EmptyRow, "sequence boundaries: 1 to 0 by 1") checkExceptionInExpression[IllegalArgumentException]( new Sequence( Literal(Date.valueOf("1970-01-01")), Literal(Date.valueOf("1970-02-01")), - Literal(negate(fromString("interval 1 month")))), + Literal(negate(stringToInterval("interval 1 month")))), EmptyRow, s"sequence boundaries: 0 to 2678400000000 by -${28 * MICROS_PER_DAY}") } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 5f043ce972bed..5cd4d11e32f7a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -1090,17 +1090,17 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(SubtractTimestamps(Literal(end), Literal(end)), new CalendarInterval(0, 0, 0)) checkEvaluation(SubtractTimestamps(Literal(end), Literal(Instant.EPOCH)), - IntervalUtils.fromString("interval " + - "436163 hours 4 minutes 1 seconds 123 milliseconds 456 microseconds")) + IntervalUtils.stringToInterval(UTF8String.fromString("interval " + + "436163 hours 4 minutes 1 seconds 123 milliseconds 456 microseconds"))) checkEvaluation(SubtractTimestamps(Literal(Instant.EPOCH), Literal(end)), - IntervalUtils.fromString("interval " + - "-436163 hours -4 minutes -1 seconds -123 milliseconds -456 microseconds")) + IntervalUtils.stringToInterval(UTF8String.fromString("interval " + + "-436163 hours -4 minutes -1 seconds -123 milliseconds -456 microseconds"))) checkEvaluation( SubtractTimestamps( Literal(Instant.parse("9999-12-31T23:59:59.999999Z")), Literal(Instant.parse("0001-01-01T00:00:00Z"))), - IntervalUtils.fromString("interval " + - "87649415 hours 59 minutes 59 seconds 999 milliseconds 999 microseconds")) + IntervalUtils.stringToInterval(UTF8String.fromString("interval " + + "87649415 hours 59 minutes 59 seconds 999 milliseconds 999 microseconds"))) } test("subtract dates") { @@ -1108,18 +1108,18 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(SubtractDates(Literal(end), Literal(end)), new CalendarInterval(0, 0, 0)) checkEvaluation(SubtractDates(Literal(end.plusDays(1)), Literal(end)), - IntervalUtils.fromString("interval 1 days")) + IntervalUtils.stringToInterval(UTF8String.fromString("interval 1 days"))) checkEvaluation(SubtractDates(Literal(end.minusDays(1)), Literal(end)), - IntervalUtils.fromString("interval -1 days")) + IntervalUtils.stringToInterval(UTF8String.fromString("interval -1 days"))) val epochDate = Literal(LocalDate.ofEpochDay(0)) checkEvaluation(SubtractDates(Literal(end), epochDate), - IntervalUtils.fromString("interval 49 years 9 months 4 days")) + IntervalUtils.stringToInterval(UTF8String.fromString("interval 49 years 9 months 4 days"))) checkEvaluation(SubtractDates(epochDate, Literal(end)), - IntervalUtils.fromString("interval -49 years -9 months -4 days")) + IntervalUtils.stringToInterval(UTF8String.fromString("interval -49 years -9 months -4 days"))) checkEvaluation( SubtractDates( Literal(LocalDate.of(10000, 1, 1)), Literal(LocalDate.of(1, 1, 1))), - IntervalUtils.fromString("interval 9999 years")) + IntervalUtils.stringToInterval(UTF8String.fromString("interval 9999 years"))) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala index 4b2da73abe562..3a68847ecb1f4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala @@ -36,6 +36,7 @@ import org.apache.spark.unsafe.types.UTF8String class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val random = new scala.util.Random + implicit def stringToUTF8Str(str: String): UTF8String = UTF8String.fromString(str) test("md5") { checkEvaluation(Md5(Literal("ABC".getBytes(StandardCharsets.UTF_8))), @@ -252,7 +253,8 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("hive-hash for CalendarInterval type") { def checkHiveHashForIntervalType(interval: String, expected: Long): Unit = { - checkHiveHash(IntervalUtils.fromString(interval), CalendarIntervalType, expected) + checkHiveHash(IntervalUtils.stringToInterval(UTF8String.fromString(interval)), + CalendarIntervalType, expected) } // ----- MICROSEC ----- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala index e483f028ffff3..ddcb6a66832af 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala @@ -21,13 +21,15 @@ import scala.language.implicitConversions import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.util.DateTimeConstants._ -import org.apache.spark.sql.catalyst.util.IntervalUtils.fromString +import org.apache.spark.sql.catalyst.util.IntervalUtils.stringToInterval import org.apache.spark.sql.types.Decimal -import org.apache.spark.unsafe.types.CalendarInterval +import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { + implicit def stringToUTF8Str(str: String): UTF8String = UTF8String.fromString(str) + implicit def interval(s: String): Literal = { - Literal(fromString("interval " + s)) + Literal(stringToInterval( "interval " + s)) } test("millenniums") { @@ -197,8 +199,8 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("multiply") { def check(interval: String, num: Double, expected: String): Unit = { checkEvaluation( - MultiplyInterval(Literal(fromString(interval)), Literal(num)), - if (expected == null) null else fromString(expected)) + MultiplyInterval(Literal(stringToInterval(interval)), Literal(num)), + if (expected == null) null else stringToInterval(expected)) } check("0 seconds", 10, "0 seconds") @@ -215,8 +217,8 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("divide") { def check(interval: String, num: Double, expected: String): Unit = { checkEvaluation( - DivideInterval(Literal(fromString(interval)), Literal(num)), - if (expected == null) null else fromString(expected)) + DivideInterval(Literal(stringToInterval(interval)), Literal(num)), + if (expected == null) null else stringToInterval(expected)) } check("0 seconds", 10, "0 seconds") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala index 23ba9c6ec7388..63700a1e94a3e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala @@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.util.IntervalUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String class MutableProjectionSuite extends SparkFunSuite with ExpressionEvalHelper { @@ -56,7 +57,8 @@ class MutableProjectionSuite extends SparkFunSuite with ExpressionEvalHelper { testBothCodegenAndInterpreted("variable-length types") { val proj = createMutableProjection(variableLengthTypes) - val scalaValues = Seq("abc", BigDecimal(10), IntervalUtils.fromString("interval 1 day"), + val scalaValues = Seq("abc", BigDecimal(10), + IntervalUtils.stringToInterval(UTF8String.fromString("interval 1 day")), Array[Byte](1, 2), Array("123", "456"), Map(1 -> "a", 2 -> "b"), Row(1, "a"), new java.lang.Integer(5)) val inputRow = InternalRow.fromSeq(scalaValues.zip(variableLengthTypes).map { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala index 4ccd4f7ce798d..ef7764dba1e9e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala @@ -485,7 +485,8 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { ("abcd".getBytes, BinaryType), ("abcd", StringType), (BigDecimal.valueOf(10), DecimalType.IntDecimal), - (IntervalUtils.fromString("interval 3 day"), CalendarIntervalType), + (IntervalUtils.stringToInterval(UTF8String.fromString("interval 3 day")), + CalendarIntervalType), (java.math.BigDecimal.valueOf(10), DecimalType.BigIntDecimal), (Array(3, 2, 1), ArrayType(IntegerType)) ).foreach { case (input, dt) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala index 20e77254ecdad..b80b30a4e07ae 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala @@ -531,7 +531,8 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers with PlanTestB // Simple tests val inputRow = InternalRow.fromSeq(Seq( false, 3.toByte, 15.toShort, -83, 129L, 1.0f, 8.0, UTF8String.fromString("test"), - Decimal(255), IntervalUtils.fromString("interval 1 day"), Array[Byte](1, 2) + Decimal(255), IntervalUtils.stringToInterval(UTF8String.fromString( "interval 1 day")), + Array[Byte](1, 2) )) val fields1 = Array( BooleanType, ByteType, ShortType, IntegerType, LongType, FloatType, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index a707b456c6bd1..d010d07400db3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants._ import org.apache.spark.sql.catalyst.util.IntervalUtils.IntervalUnit._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.CalendarInterval +import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} /** * Test basic expression parsing. @@ -43,6 +43,8 @@ class ExpressionParserSuite extends AnalysisTest { import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ + implicit def stringToUTF8Str(str: String): UTF8String = UTF8String.fromString(str) + val defaultParser = CatalystSqlParser def assertEqual( @@ -434,13 +436,13 @@ class ExpressionParserSuite extends AnalysisTest { intercept("timestamP '2016-33-11 20:54:00.000'", "Cannot parse the TIMESTAMP value") // Interval. - val intervalLiteral = Literal(IntervalUtils.fromString("interval 3 month 1 hour")) + val intervalLiteral = Literal(IntervalUtils.stringToInterval("interval 3 month 1 hour")) assertEqual("InterVal 'interval 3 month 1 hour'", intervalLiteral) assertEqual("INTERVAL '3 month 1 hour'", intervalLiteral) intercept("Interval 'interval 3 monthsss 1 hoursss'", "Cannot parse the INTERVAL value") assertEqual( "-interval '3 month 1 hour'", - Literal(IntervalUtils.fromString("interval -3 month -1 hour"))) + Literal(IntervalUtils.stringToInterval("interval -3 month -1 hour"))) // Binary. assertEqual("X'A'", Literal(Array(0x0a).map(_.toByte))) @@ -602,7 +604,7 @@ class ExpressionParserSuite extends AnalysisTest { MICROSECOND) def intervalLiteral(u: IntervalUnit, s: String): Literal = { - Literal(IntervalUtils.fromUnitStrings(Array(u), Array(s))) + Literal(IntervalUtils.stringToInterval(s + " " + u.toString)) } test("intervals") { @@ -651,7 +653,8 @@ class ExpressionParserSuite extends AnalysisTest { 0, 0, 13 * MICROS_PER_SECOND + 123 * MICROS_PER_MILLIS + 456))) - checkIntervals("1.001 second", Literal(IntervalUtils.fromString("1 second 1 millisecond"))) + checkIntervals("1.001 second", + Literal(IntervalUtils.stringToInterval("1 second 1 millisecond"))) // Non Existing unit intercept("interval 10 nanoseconds", diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala index f919bd1644871..b8a6c68bd0435 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala @@ -28,20 +28,31 @@ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} class IntervalUtilsSuite extends SparkFunSuite { private def checkFromString(input: String, expected: CalendarInterval): Unit = { - assert(fromString(input) === expected) assert(stringToInterval(UTF8String.fromString(input)) === expected) + assert(safeStringToInterval(UTF8String.fromString(input)) === expected) + } + + private def checkFromStringWithFunc( + input: String, + months: Int, + days: Int, + us: Long, + func: CalendarInterval => CalendarInterval): Unit = { + val expected = new CalendarInterval(months, days, us) + assert(func(stringToInterval(UTF8String.fromString(input))) === expected) + assert(func(safeStringToInterval(UTF8String.fromString(input))) === expected) } private def checkFromInvalidString(input: String, errorMsg: String): Unit = { try { - fromString(input) + stringToInterval(UTF8String.fromString(input)) fail("Expected to throw an exception for the invalid input") } catch { case e: IllegalArgumentException => val msg = e.getMessage assert(msg.contains(errorMsg)) } - assert(stringToInterval(UTF8String.fromString(input)) === null) + assert(safeStringToInterval(UTF8String.fromString(input)) === null) } private def testSingleUnit( @@ -69,7 +80,7 @@ class IntervalUtilsSuite extends SparkFunSuite { checkFromInvalidString(null, "cannot be null") for (input <- Seq("", " ", "interval", "interval1 day", "foo", "foo 1 day")) { - checkFromInvalidString(input, "Invalid interval string") + checkFromInvalidString(input, "Error parsing") } } @@ -93,8 +104,18 @@ class IntervalUtilsSuite extends SparkFunSuite { // Allow duplicated units and summarize their values checkFromString("1 day 10 day", new CalendarInterval(0, 11, 0)) // Only the seconds units can have the fractional part - checkFromInvalidString("1.5 days", "Error parsing interval string") - checkFromInvalidString("1. hour", "Error parsing interval string") + checkFromInvalidString("1.5 days", "'days' cannot have fractional part") + checkFromInvalidString("1. hour", "'hour' cannot have fractional part") + checkFromInvalidString("1 hourX", "invalid unit 'hourx'") + checkFromInvalidString("~1 hour", "unrecognized number '~1'") + checkFromInvalidString("1 Mour", "invalid unit 'mour'") + checkFromInvalidString("1 aour", "invalid unit 'aour'") + checkFromInvalidString("1a1 hour", "invalid value '1a1'") + checkFromInvalidString("1.1a1 seconds", "invalid value '1.1a1'") + checkFromInvalidString("2234567890 days", "integer overflow") + checkFromInvalidString("\n", "Error parsing '\n' to interval") + checkFromInvalidString("\t", "Error parsing '\t' to interval") + } test("string to interval: seconds with fractional part") { @@ -107,7 +128,7 @@ class IntervalUtilsSuite extends SparkFunSuite { // truncate nanoseconds to microseconds checkFromString("0.999999999 seconds", new CalendarInterval(0, 0, 999999)) checkFromString(".999999999 seconds", new CalendarInterval(0, 0, 999999)) - checkFromInvalidString("0.123456789123 seconds", "Error parsing interval string") + checkFromInvalidString("0.123456789123 seconds", "'0.123456789123' is out of range") } test("from year-month string") { @@ -174,7 +195,7 @@ class IntervalUtilsSuite extends SparkFunSuite { test("interval duration") { def duration(s: String, unit: TimeUnit, daysPerMonth: Int): Long = { - IntervalUtils.getDuration(fromString(s), unit, daysPerMonth) + IntervalUtils.getDuration(stringToInterval(UTF8String.fromString(s)), unit, daysPerMonth) } assert(duration("0 seconds", TimeUnit.MILLISECONDS, 31) === 0) @@ -193,7 +214,7 @@ class IntervalUtilsSuite extends SparkFunSuite { test("negative interval") { def isNegative(s: String, daysPerMonth: Int): Boolean = { - IntervalUtils.isNegative(fromString(s), daysPerMonth) + IntervalUtils.isNegative(stringToInterval(UTF8String.fromString(s)), daysPerMonth) } assert(isNegative("-1 months", 28)) @@ -269,33 +290,27 @@ class IntervalUtilsSuite extends SparkFunSuite { } test("justify days") { - assert(justifyDays(fromString("1 month 35 day")) === new CalendarInterval(2, 5, 0)) - assert(justifyDays(fromString("-1 month 35 day")) === new CalendarInterval(0, 5, 0)) - assert(justifyDays(fromString("1 month -35 day")) === new CalendarInterval(0, -5, 0)) - assert(justifyDays(fromString("-1 month -35 day")) === new CalendarInterval(-2, -5, 0)) - assert(justifyDays(fromString("-1 month 2 day")) === new CalendarInterval(0, -28, 0)) + checkFromStringWithFunc("1 month 35 day", 2, 5, 0, justifyDays) + checkFromStringWithFunc("-1 month 35 day", 0, 5, 0, justifyDays) + checkFromStringWithFunc("1 month -35 day", 0, -5, 0, justifyDays) + checkFromStringWithFunc("-1 month -35 day", -2, -5, 0, justifyDays) + checkFromStringWithFunc("-1 month 2 day", 0, -28, 0, justifyDays) } test("justify hours") { - assert(justifyHours(fromString("29 day 25 hour")) === - new CalendarInterval(0, 30, 1 * MICROS_PER_HOUR)) - assert(justifyHours(fromString("29 day -25 hour")) === - new CalendarInterval(0, 27, 23 * MICROS_PER_HOUR)) - assert(justifyHours(fromString("-29 day 25 hour")) === - new CalendarInterval(0, -27, -23 * MICROS_PER_HOUR)) - assert(justifyHours(fromString("-29 day -25 hour")) === - new CalendarInterval(0, -30, -1 * MICROS_PER_HOUR)) + checkFromStringWithFunc("29 day 25 hour", 0, 30, 1 * MICROS_PER_HOUR, justifyHours) + checkFromStringWithFunc("29 day -25 hour", 0, 27, 23 * MICROS_PER_HOUR, justifyHours) + checkFromStringWithFunc("-29 day 25 hour", 0, -27, -23 * MICROS_PER_HOUR, justifyHours) + checkFromStringWithFunc("-29 day -25 hour", 0, -30, -1 * MICROS_PER_HOUR, justifyHours) } test("justify interval") { - assert(justifyInterval(fromString("1 month 29 day 25 hour")) === - new CalendarInterval(2, 0, 1 * MICROS_PER_HOUR)) - assert(justifyInterval(fromString("-1 month 29 day -25 hour")) === - new CalendarInterval(0, -2, -1 * MICROS_PER_HOUR)) - assert(justifyInterval(fromString("1 month -29 day -25 hour")) === - new CalendarInterval(0, 0, -1 * MICROS_PER_HOUR)) - assert(justifyInterval(fromString("-1 month -29 day -25 hour")) === - new CalendarInterval(-2, 0, -1 * MICROS_PER_HOUR)) + checkFromStringWithFunc("1 month 29 day 25 hour", 2, 0, 1 * MICROS_PER_HOUR, justifyInterval) + checkFromStringWithFunc("-1 month 29 day -25 hour", 0, -2, -1 * MICROS_PER_HOUR, + justifyInterval) + checkFromStringWithFunc("1 month -29 day -25 hour", 0, 0, -1 * MICROS_PER_HOUR, justifyInterval) + checkFromStringWithFunc("-1 month -29 day -25 hour", -2, 0, -1 * MICROS_PER_HOUR, + justifyInterval) intercept[ArithmeticException](justifyInterval(new CalendarInterval(2, 0, Long.MaxValue))) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index aaa3f9dd71594..0fcd82276392c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -60,7 +60,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.sql.util.SchemaUtils import org.apache.spark.storage.StorageLevel import org.apache.spark.unsafe.array.ByteArrayMethods -import org.apache.spark.unsafe.types.CalendarInterval +import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} import org.apache.spark.util.Utils private[sql] object Dataset { @@ -725,7 +725,7 @@ class Dataset[T] private[sql]( def withWatermark(eventTime: String, delayThreshold: String): Dataset[T] = withTypedPlan { val parsedDelay = try { - IntervalUtils.fromString(delayThreshold) + IntervalUtils.stringToInterval(UTF8String.fromString(delayThreshold)) } catch { case e: IllegalArgumentException => throw new AnalysisException( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala index aac5da8104a8b..59ce7c3707b27 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{EventTimeTimeout, Processing import org.apache.spark.sql.catalyst.util.IntervalUtils import org.apache.spark.sql.execution.streaming.GroupStateImpl._ import org.apache.spark.sql.streaming.{GroupState, GroupStateTimeout} -import org.apache.spark.unsafe.types.CalendarInterval +import org.apache.spark.unsafe.types.UTF8String /** @@ -160,7 +160,7 @@ private[sql] class GroupStateImpl[S] private( def getTimeoutTimestamp: Long = timeoutTimestamp private def parseDuration(duration: String): Long = { - val cal = IntervalUtils.fromString(duration) + val cal = IntervalUtils.stringToInterval(UTF8String.fromString(duration)) if (IntervalUtils.isNegative(cal)) { throw new IllegalArgumentException(s"Provided duration ($duration) is negative") } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala index 2dd287cb734bf..1a27fe61d9602 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala @@ -24,6 +24,7 @@ import scala.concurrent.duration.Duration import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_DAY import org.apache.spark.sql.catalyst.util.IntervalUtils import org.apache.spark.sql.streaming.Trigger +import org.apache.spark.unsafe.types.UTF8String private object Triggers { def validate(intervalMs: Long): Unit = { @@ -31,7 +32,7 @@ private object Triggers { } def convert(interval: String): Long = { - val cal = IntervalUtils.fromString(interval) + val cal = IntervalUtils.stringToInterval(UTF8String.fromString(interval)) if (cal.months != 0) { throw new IllegalArgumentException(s"Doesn't support month or year interval: $interval") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala index b2185f8559f36..6cb13d0e08a6d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala @@ -770,7 +770,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession { assertError("select interval '23:61:15' hour to second", "minute 61 outside range [0, 59]") assertError("select interval '.1111111111' second", - "nanosecond 1111111111 outside range") + "'.1111111111' is out of range") } test("use native json_tuple instead of hive's UDTF in LATERAL VIEW") {