From 6d0564c2e2343342e99867313a5f6b6dbabae230 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Wed, 9 Oct 2019 23:51:46 +0300 Subject: [PATCH 1/7] Make fromString tolerant to prefix --- .../spark/unsafe/types/CalendarInterval.java | 56 ++++++++++--------- .../unsafe/types/CalendarIntervalSuite.java | 11 +++- 2 files changed, 39 insertions(+), 28 deletions(-) diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java index 911c5c7f646f4..bd1df505ff257 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java @@ -18,7 +18,6 @@ package org.apache.spark.unsafe.types; import java.io.Serializable; -import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -73,45 +72,50 @@ private static long toLong(String s) { * This method is case-insensitive. */ public static CalendarInterval fromString(String s) { - if (s == null) { - return null; - } - s = s.trim(); - Matcher m = p.matcher(s); - if (!m.matches() || s.compareToIgnoreCase("interval") == 0) { + try { + return fromCaseInsensitiveString(s); + } catch (IllegalArgumentException e) { return null; - } else { - long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); - long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; - microseconds += toLong(m.group(4)) * MICROS_PER_DAY; - microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; - microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; - microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; - microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; - microseconds += toLong(m.group(9)); - return new CalendarInterval((int) months, microseconds); } } /** - * Convert a string to CalendarInterval. Unlike fromString, this method can handle + * Convert a string to CalendarInterval. This method can handle * strings without the `interval` prefix and throws IllegalArgumentException * when the input string is not a valid interval. * * @throws IllegalArgumentException if the string is not a valid internal. */ public static CalendarInterval fromCaseInsensitiveString(String s) { - if (s == null || s.trim().isEmpty()) { - throw new IllegalArgumentException("Interval cannot be null or blank."); + if (s == null) { + throw new IllegalArgumentException("Interval cannot be null"); } - String sInLowerCase = s.trim().toLowerCase(Locale.ROOT); - String interval = - sInLowerCase.startsWith("interval ") ? sInLowerCase : "interval " + sInLowerCase; - CalendarInterval cal = fromString(interval); - if (cal == null) { + String trimmed = s.trim(); + if (trimmed.isEmpty()) { + throw new IllegalArgumentException("Interval cannot be blank"); + } + String prefix = "interval"; + String intervalStr = trimmed; + if (!intervalStr.regionMatches(true, 0, prefix, 0, prefix.length())) { + intervalStr = prefix + " " + trimmed; + } else if (intervalStr.length() == prefix.length()) { + throw new IllegalArgumentException("Interval string must have time units"); + } + + Matcher m = p.matcher(intervalStr); + if (!m.matches()) { throw new IllegalArgumentException("Invalid interval: " + s); } - return cal; + + long months = toLong(m.group(1)) * 12 + toLong(m.group(2)); + long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK; + microseconds += toLong(m.group(4)) * MICROS_PER_DAY; + microseconds += toLong(m.group(5)) * MICROS_PER_HOUR; + microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE; + microseconds += toLong(m.group(7)) * MICROS_PER_SECOND; + microseconds += toLong(m.group(8)) * MICROS_PER_MILLI; + microseconds += toLong(m.group(9)); + return new CalendarInterval((int) months, microseconds); } public static long toLongWithRange(String fieldName, diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java index 6ccc65f7d1740..9cd689639fa2f 100644 --- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java +++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java @@ -115,7 +115,9 @@ public void fromCaseInsensitiveStringTest() { fromCaseInsensitiveString(input); fail("Expected to throw an exception for the invalid input"); } catch (IllegalArgumentException e) { - assertTrue(e.getMessage().contains("cannot be null or blank")); + String msg = e.getMessage(); + if (input == null) assertTrue(msg.contains("cannot be null")); + else assertTrue(msg.contains("cannot be blank")); } } @@ -124,7 +126,12 @@ public void fromCaseInsensitiveStringTest() { fromCaseInsensitiveString(input); fail("Expected to throw an exception for the invalid input"); } catch (IllegalArgumentException e) { - assertTrue(e.getMessage().contains("Invalid interval")); + String msg = e.getMessage(); + if (input.trim().equalsIgnoreCase("interval")) { + assertTrue(msg.contains("Interval string must have time units")); + } else { + assertTrue(msg.contains("Invalid interval:")); + } } } } From 60d7d345f83f0708cd7ee1a17154a663c9b043d3 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 10 Oct 2019 00:08:16 +0300 Subject: [PATCH 2/7] Use Arrays.asList --- .../spark/unsafe/types/CalendarIntervalSuite.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java index 9cd689639fa2f..656fd372823ee 100644 --- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java +++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java @@ -19,6 +19,8 @@ import org.junit.Test; +import java.util.Arrays; + import static org.junit.Assert.*; import static org.apache.spark.unsafe.types.CalendarInterval.*; @@ -275,11 +277,13 @@ public void subtractTest() { } private static void testSingleUnit(String unit, int number, int months, long microseconds) { - String input1 = "interval " + number + " " + unit; - String input2 = "interval " + number + " " + unit + "s"; - CalendarInterval result = new CalendarInterval(months, microseconds); - assertEquals(fromString(input1), result); - assertEquals(fromString(input2), result); + Arrays.asList("interval ", "").forEach(prefix -> { + String input1 = prefix + number + " " + unit; + String input2 = prefix + number + " " + unit + "s"; + CalendarInterval result = new CalendarInterval(months, microseconds); + assertEquals(fromString(input1), result); + assertEquals(fromString(input2), result); + }); } @Test From 9253cec0c20d1c5ea5a443456f3c8b2167d7c9ef Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 10 Oct 2019 00:19:26 +0300 Subject: [PATCH 3/7] Add test for missing prefix --- .../unsafe/types/CalendarIntervalSuite.java | 44 +++++++------------ 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java index 656fd372823ee..587071332ce47 100644 --- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java +++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java @@ -74,36 +74,26 @@ public void fromStringTest() { testSingleUnit("millisecond", 3, 0, 3 * MICROS_PER_MILLI); testSingleUnit("microsecond", 3, 0, 3); - String input; - - input = "interval -5 years 23 month"; CalendarInterval result = new CalendarInterval(-5 * 12 + 23, 0); - assertEquals(fromString(input), result); - - input = "interval -5 years 23 month "; - assertEquals(fromString(input), result); - - input = " interval -5 years 23 month "; - assertEquals(fromString(input), result); + Arrays.asList( + "interval -5 years 23 month", + " -5 years 23 month", + "interval -5 years 23 month ", + " -5 years 23 month ", + " interval -5 years 23 month ").forEach(input -> + assertEquals(fromString(input), result) + ); // Error cases - input = "interval 3month 1 hour"; - assertNull(fromString(input)); - - input = "interval 3 moth 1 hour"; - assertNull(fromString(input)); - - input = "interval"; - assertNull(fromString(input)); - - input = "int"; - assertNull(fromString(input)); - - input = ""; - assertNull(fromString(input)); - - input = null; - assertNull(fromString(input)); + Arrays.asList( + "interval 3month 1 hour", + "3month 1 hour", + "interval 3 moth 1 hour", + "3 moth 1 hour", + "interval", + "int", + "", + null).forEach(input -> assertNull(fromString(input))); } @Test From eecfbf46dce8568eb29146237fd77d71f18cca80 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 10 Oct 2019 09:55:41 +0300 Subject: [PATCH 4/7] Test for cast --- .../org/apache/spark/sql/catalyst/expressions/CastSuite.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 96ef3a558b85b..fc7a0d3af4e28 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -672,6 +672,8 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { "interval 1 years 3 months -3 days") checkEvaluation(Cast(Literal("INTERVAL 1 Second 1 microsecond"), CalendarIntervalType), new CalendarInterval(0, 1000001)) + checkEvaluation(Cast(Literal("1 MONTH 1 Microsecond"), CalendarIntervalType), + new CalendarInterval(1, 1)) } test("cast string to boolean") { From e254ee55e7da85f87fc839954a30e1b4b3f17cf9 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 10 Oct 2019 10:13:48 +0300 Subject: [PATCH 5/7] Test for the interval constructor --- .../spark/sql/catalyst/parser/ExpressionParserSuite.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index 5da2bf059758d..c2e80c639f43b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -432,8 +432,9 @@ class ExpressionParserSuite extends AnalysisTest { intercept("timestamP '2016-33-11 20:54:00.000'") // Interval. - assertEqual("InterVal 'interval 3 month 1 hour'", - Literal(CalendarInterval.fromString("interval 3 month 1 hour"))) + val intervalLiteral = Literal(CalendarInterval.fromString("interval 3 month 1 hour")) + assertEqual("InterVal 'interval 3 month 1 hour'", intervalLiteral) + assertEqual("INTERVAL '3 month 1 hour'", intervalLiteral) assertEqual("Interval 'interval 3 monthsss 1 hoursss'", Literal(null, CalendarIntervalType)) From 813897b697c4bbfc350587c162324e9d68c51eaa Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 10 Oct 2019 16:18:19 +0300 Subject: [PATCH 6/7] Add comments --- .../java/org/apache/spark/unsafe/types/CalendarInterval.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java index bd1df505ff257..28fb64f7cd0e0 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java @@ -96,7 +96,10 @@ public static CalendarInterval fromCaseInsensitiveString(String s) { } String prefix = "interval"; String intervalStr = trimmed; + // Checks the given interval string does not start with the `interval` prefix if (!intervalStr.regionMatches(true, 0, prefix, 0, prefix.length())) { + // Prepend `interval` if it does not present because + // the regular expression strictly require it. intervalStr = prefix + " " + trimmed; } else if (intervalStr.length() == prefix.length()) { throw new IllegalArgumentException("Interval string must have time units"); From e2c1352c66ca8a74d74e154513fe4b9546fc7537 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sat, 12 Oct 2019 11:31:56 +0300 Subject: [PATCH 7/7] Regenerate literals.sql.out --- .../src/test/resources/sql-tests/results/literals.sql.out | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out index e1e8d685e8787..aef23963da374 100644 --- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out @@ -435,6 +435,6 @@ interval 3 years 1 hours -- !query 45 select interval '3 year 1 hour' -- !query 45 schema -struct +struct -- !query 45 output -NULL +interval 3 years 1 hours