From cdc7b2d55be0fc39ae947cd7f9ebdb6a6a67b51b Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 18 Nov 2019 17:33:17 +0800 Subject: [PATCH 1/4] [SPARK-29926][SQL] Fix weird interval string whose value end with dangling decial point --- .../org/apache/spark/sql/catalyst/util/IntervalUtils.scala | 7 ++++--- .../spark/sql/catalyst/util/IntervalUtilsSuite.scala | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 725ff81ef410e..c64fdae64c00a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -505,6 +505,7 @@ object IntervalUtils { var days: Int = 0 var microseconds: Long = 0 var fractionScale: Int = 0 + val validOriginFractionScale = (NANOS_PER_SECOND / 10).toInt var fraction: Int = 0 def trimToNextState(b: Byte, next: ParseState): Unit = { @@ -556,7 +557,7 @@ object IntervalUtils { isNegative = false case '.' => isNegative = false - fractionScale = (NANOS_PER_SECOND / 10).toInt + fractionScale = validOriginFractionScale i += 1 state = VALUE_FRACTIONAL_PART case _ => throwIAE( s"unrecognized number '$currentWord'") @@ -572,7 +573,7 @@ object IntervalUtils { } case ' ' => state = TRIM_BEFORE_UNIT case '.' => - fractionScale = (NANOS_PER_SECOND / 10).toInt + fractionScale = validOriginFractionScale state = VALUE_FRACTIONAL_PART case _ => throwIAE(s"invalid value '$currentWord'") } @@ -582,7 +583,7 @@ object IntervalUtils { case _ if '0' <= b && b <= '9' && fractionScale > 0 => fraction += (b - '0') * fractionScale fractionScale /= 10 - case ' ' => + case ' ' if fractionScale != validOriginFractionScale => fraction /= NANOS_PER_MICROS.toInt state = TRIM_BEFORE_UNIT case _ if '0' <= b && b <= '9' => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala index 73a2adbaec1db..3c57f8ef8b604 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala @@ -105,7 +105,7 @@ class IntervalUtilsSuite extends SparkFunSuite { checkFromString("1 day 10 day", new CalendarInterval(0, 11, 0)) // Only the seconds units can have the fractional part checkFromInvalidString("1.5 days", "'days' cannot have fractional part") - checkFromInvalidString("1. hour", "'hour' cannot have fractional part") + checkFromInvalidString("1. hour", "invalid value '1.'") checkFromInvalidString("1 hourX", "invalid unit 'hourx'") checkFromInvalidString("~1 hour", "unrecognized number '~1'") checkFromInvalidString("1 Mour", "invalid unit 'mour'") @@ -115,12 +115,12 @@ class IntervalUtilsSuite extends SparkFunSuite { checkFromInvalidString("2234567890 days", "integer overflow") checkFromInvalidString("\n", "Error parsing '\n' to interval") checkFromInvalidString("\t", "Error parsing '\t' to interval") - + checkFromInvalidString("1. seconds", "invalid value '1.'") + checkFromInvalidString(". seconds", "invalid value '.'") } test("string to interval: seconds with fractional part") { checkFromString("0.1 seconds", new CalendarInterval(0, 0, 100000)) - checkFromString("1. seconds", new CalendarInterval(0, 0, 1000000)) checkFromString("123.001 seconds", new CalendarInterval(0, 0, 123001000)) checkFromString("1.001001 seconds", new CalendarInterval(0, 0, 1001001)) checkFromString("1 minute 1.001001 seconds", new CalendarInterval(0, 0, 61001001)) From 42afa6fdf20143af3b5cb842b88cdd7d8df720f0 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 18 Nov 2019 21:52:06 +0800 Subject: [PATCH 2/4] naming --- .../apache/spark/sql/catalyst/util/IntervalUtils.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index c64fdae64c00a..d48b2df74c2ba 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -505,7 +505,7 @@ object IntervalUtils { var days: Int = 0 var microseconds: Long = 0 var fractionScale: Int = 0 - val validOriginFractionScale = (NANOS_PER_SECOND / 10).toInt + val initialFractionScale = (NANOS_PER_SECOND / 10).toInt var fraction: Int = 0 def trimToNextState(b: Byte, next: ParseState): Unit = { @@ -557,7 +557,7 @@ object IntervalUtils { isNegative = false case '.' => isNegative = false - fractionScale = validOriginFractionScale + fractionScale = initialFractionScale i += 1 state = VALUE_FRACTIONAL_PART case _ => throwIAE( s"unrecognized number '$currentWord'") @@ -573,7 +573,7 @@ object IntervalUtils { } case ' ' => state = TRIM_BEFORE_UNIT case '.' => - fractionScale = validOriginFractionScale + fractionScale = initialFractionScale state = VALUE_FRACTIONAL_PART case _ => throwIAE(s"invalid value '$currentWord'") } @@ -583,7 +583,7 @@ object IntervalUtils { case _ if '0' <= b && b <= '9' && fractionScale > 0 => fraction += (b - '0') * fractionScale fractionScale /= 10 - case ' ' if fractionScale != validOriginFractionScale => + case ' ' if fractionScale != initialFractionScale => fraction /= NANOS_PER_MICROS.toInt state = TRIM_BEFORE_UNIT case _ if '0' <= b && b <= '9' => From 982b63daa8c2fced82693d06b9951ba4eda13bcf Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 18 Nov 2019 23:15:10 +0800 Subject: [PATCH 3/4] nit --- .../org/apache/spark/sql/catalyst/util/IntervalUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index d48b2df74c2ba..e8e9974fb802d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -583,7 +583,7 @@ object IntervalUtils { case _ if '0' <= b && b <= '9' && fractionScale > 0 => fraction += (b - '0') * fractionScale fractionScale /= 10 - case ' ' if fractionScale != initialFractionScale => + case ' ' if fractionScale < initialFractionScale => fraction /= NANOS_PER_MICROS.toInt state = TRIM_BEFORE_UNIT case _ if '0' <= b && b <= '9' => From f6b0edd79aac4c415afc1bad3ca3c9cd6cf21cbc Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 19 Nov 2019 15:56:33 +0800 Subject: [PATCH 4/4] only fibid '. seconds' --- .../org/apache/spark/sql/catalyst/util/IntervalUtils.scala | 5 ++++- .../apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index e8e9974fb802d..46584b67097ce 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -507,6 +507,7 @@ object IntervalUtils { var fractionScale: Int = 0 val initialFractionScale = (NANOS_PER_SECOND / 10).toInt var fraction: Int = 0 + var pointPrefixed: Boolean = false def trimToNextState(b: Byte, next: ParseState): Unit = { b match { @@ -546,6 +547,7 @@ object IntervalUtils { // We preset the scale to an invalid value to track fraction presence in the UNIT_BEGIN // state. If we meet '.', the scale become valid for the VALUE_FRACTIONAL_PART state. fractionScale = -1 + pointPrefixed = false b match { case '-' => isNegative = true @@ -558,6 +560,7 @@ object IntervalUtils { case '.' => isNegative = false fractionScale = initialFractionScale + pointPrefixed = true i += 1 state = VALUE_FRACTIONAL_PART case _ => throwIAE( s"unrecognized number '$currentWord'") @@ -583,7 +586,7 @@ object IntervalUtils { case _ if '0' <= b && b <= '9' && fractionScale > 0 => fraction += (b - '0') * fractionScale fractionScale /= 10 - case ' ' if fractionScale < initialFractionScale => + case ' ' if !pointPrefixed || fractionScale < initialFractionScale => fraction /= NANOS_PER_MICROS.toInt state = TRIM_BEFORE_UNIT case _ if '0' <= b && b <= '9' => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala index 3c57f8ef8b604..ee3db0391ed00 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala @@ -105,7 +105,7 @@ class IntervalUtilsSuite extends SparkFunSuite { checkFromString("1 day 10 day", new CalendarInterval(0, 11, 0)) // Only the seconds units can have the fractional part checkFromInvalidString("1.5 days", "'days' cannot have fractional part") - checkFromInvalidString("1. hour", "invalid value '1.'") + checkFromInvalidString("1. hour", "'hour' cannot have fractional part") checkFromInvalidString("1 hourX", "invalid unit 'hourx'") checkFromInvalidString("~1 hour", "unrecognized number '~1'") checkFromInvalidString("1 Mour", "invalid unit 'mour'") @@ -115,12 +115,12 @@ class IntervalUtilsSuite extends SparkFunSuite { checkFromInvalidString("2234567890 days", "integer overflow") checkFromInvalidString("\n", "Error parsing '\n' to interval") checkFromInvalidString("\t", "Error parsing '\t' to interval") - checkFromInvalidString("1. seconds", "invalid value '1.'") checkFromInvalidString(". seconds", "invalid value '.'") } test("string to interval: seconds with fractional part") { checkFromString("0.1 seconds", new CalendarInterval(0, 0, 100000)) + checkFromString("1. seconds", new CalendarInterval(0, 0, 1000000)) checkFromString("123.001 seconds", new CalendarInterval(0, 0, 123001000)) checkFromString("1.001001 seconds", new CalendarInterval(0, 0, 1001001)) checkFromString("1 minute 1.001001 seconds", new CalendarInterval(0, 0, 61001001))