From 29e3da1305a42ea0519b2337266a1b7a625ba530 Mon Sep 17 00:00:00 2001 From: Youjun Yuan Date: Thu, 12 Jan 2023 18:07:24 +0800 Subject: [PATCH 1/2] Add benchmark for DateTimeUtils.parseDate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Benchmark Mode Cnt Score Error Units BenchmarkParseDate.parseDate thrpt 10 48.887 ± 3.222 ops/ms --- .../io/trino/util/BenchmarkParseDate.java | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 core/trino-main/src/test/java/io/trino/util/BenchmarkParseDate.java diff --git a/core/trino-main/src/test/java/io/trino/util/BenchmarkParseDate.java b/core/trino-main/src/test/java/io/trino/util/BenchmarkParseDate.java new file mode 100644 index 00000000000..d426b7f8ed2 --- /dev/null +++ b/core/trino-main/src/test/java/io/trino/util/BenchmarkParseDate.java @@ -0,0 +1,73 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.util; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.RunnerException; + +import java.util.concurrent.TimeUnit; + +import static io.trino.jmh.Benchmarks.benchmark; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static org.openjdk.jmh.annotations.Mode.Throughput; +import static org.openjdk.jmh.annotations.Scope.Thread; + +@State(Thread) +@OutputTimeUnit(MILLISECONDS) +@BenchmarkMode(Throughput) +@Fork(1) +@Warmup(iterations = 5, time = 1000, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 10, time = 1000, timeUnit = TimeUnit.MILLISECONDS) +public class BenchmarkParseDate +{ + @Benchmark + public void parseDate(BenchmarkData data, Blackhole blackhole) + { + for (String dt : data.dates) { + blackhole.consume(DateTimeUtils.parseDate(dt)); + } + } + + @State(Thread) + public static class BenchmarkData + { + String[] dates; + + @Setup + public void setup() + { + dates = new String[100]; + // use the 100 consecutive dates start from 2023-01-01 + String startDate = "2023-01-01"; + int days = DateTimeUtils.parseDate(startDate); + for (int i = 0; i < dates.length; i++) { + dates[i] = DateTimeUtils.printDate(days + i); + } + } + } + + public static void main(String[] args) + throws RunnerException + { + benchmark(BenchmarkParseDate.class).run(); + } +} From d3cf9c603764c3c77e92a0633fb255e885853fde Mon Sep 17 00:00:00 2001 From: Youjun Yuan Date: Thu, 12 Jan 2023 18:09:04 +0800 Subject: [PATCH 2/2] Optimize date parsing for format yyyy-MM-dd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Optimized the date parsing if it is in the format yyyy-MM-dd. Below are the benchmark result before and after the change: After: Benchmark Mode Cnt Score Error Units BenchmarkParseDate.parseDate thrpt 10 762.769 ± 35.198 ops/ms Before: Benchmark Mode Cnt Score Error Units BenchmarkParseDate.parseDate thrpt 10 48.887 ± 3.222 ops/ms --- .../operator/scalar/DateTimeFunctions.java | 15 ++++ .../java/io/trino/type/DateOperators.java | 4 +- .../java/io/trino/util/DateTimeUtils.java | 61 +++++++++++++++ .../java/io/trino/util/TestDateTimeUtils.java | 77 +++++++++++++++++++ 4 files changed, 156 insertions(+), 1 deletion(-) create mode 100644 core/trino-main/src/test/java/io/trino/util/TestDateTimeUtils.java diff --git a/core/trino-main/src/main/java/io/trino/operator/scalar/DateTimeFunctions.java b/core/trino-main/src/main/java/io/trino/operator/scalar/DateTimeFunctions.java index 0d2b075edc2..d5638e54a0b 100644 --- a/core/trino-main/src/main/java/io/trino/operator/scalar/DateTimeFunctions.java +++ b/core/trino-main/src/main/java/io/trino/operator/scalar/DateTimeFunctions.java @@ -15,6 +15,7 @@ import io.airlift.concurrent.ThreadLocalCache; import io.airlift.slice.Slice; +import io.airlift.slice.Slices; import io.airlift.units.Duration; import io.trino.operator.scalar.timestamptz.CurrentTimestamp; import io.trino.spi.TrinoException; @@ -29,6 +30,7 @@ import io.trino.spi.type.StandardTypes; import io.trino.spi.type.TimeZoneKey; import io.trino.type.DateTimes; +import io.trino.util.DateTimeUtils; import org.joda.time.DateTime; import org.joda.time.DateTimeField; import org.joda.time.Days; @@ -40,6 +42,7 @@ import org.joda.time.format.ISODateTimeFormat; import java.math.BigInteger; +import java.time.DateTimeException; import java.time.LocalDateTime; import java.time.ZonedDateTime; import java.time.format.DateTimeParseException; @@ -52,6 +55,7 @@ import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone; import static io.trino.spi.type.Int128Math.rescale; import static io.trino.spi.type.TimeZoneKey.getTimeZoneKeyForOffset; +import static io.trino.spi.type.Timestamps.MILLISECONDS_PER_DAY; import static io.trino.spi.type.Timestamps.NANOSECONDS_PER_SECOND; import static io.trino.type.DateTimes.PICOSECONDS_PER_NANOSECOND; import static io.trino.type.DateTimes.PICOSECONDS_PER_SECOND; @@ -85,6 +89,7 @@ public final class DateTimeFunctions private static final int MILLISECONDS_IN_HOUR = 60 * MILLISECONDS_IN_MINUTE; private static final int MILLISECONDS_IN_DAY = 24 * MILLISECONDS_IN_HOUR; private static final int PIVOT_YEAR = 2020; // yy = 70 will correspond to 1970 but 69 to 2069 + private static final Slice ISO_8861_DATE_FORMAT = Slices.utf8Slice("%Y-%m-%d"); private DateTimeFunctions() {} @@ -370,6 +375,16 @@ public static Slice dateFormat(ISOChronology chronology, Locale locale, long tim @SqlType("timestamp(3)") // TODO: increase precision? public static long dateParse(ConnectorSession session, @SqlType("varchar(x)") Slice dateTime, @SqlType("varchar(y)") Slice formatString) { + if (ISO_8861_DATE_FORMAT.equals(formatString)) { + try { + long days = DateTimeUtils.parseDate(dateTime.toStringUtf8()); + return scaleEpochMillisToMicros(days * MILLISECONDS_PER_DAY); + } + catch (IllegalArgumentException | ArithmeticException | DateTimeException e) { + throw new TrinoException(INVALID_FUNCTION_ARGUMENT, e); + } + } + DateTimeFormatter formatter = DATETIME_FORMATTER_CACHE.get(formatString) .withZoneUTC() .withLocale(session.getLocale()); diff --git a/core/trino-main/src/main/java/io/trino/type/DateOperators.java b/core/trino-main/src/main/java/io/trino/type/DateOperators.java index b2f89b2bdd8..39e488cc2af 100644 --- a/core/trino-main/src/main/java/io/trino/type/DateOperators.java +++ b/core/trino-main/src/main/java/io/trino/type/DateOperators.java @@ -22,6 +22,8 @@ import io.trino.spi.function.SqlType; import io.trino.spi.type.StandardTypes; +import java.time.DateTimeException; + import static io.airlift.slice.SliceUtf8.trim; import static io.airlift.slice.Slices.utf8Slice; import static io.trino.spi.StandardErrorCode.INVALID_CAST_ARGUMENT; @@ -60,7 +62,7 @@ public static long castFromVarchar(@SqlType("varchar(x)") Slice value) try { return parseDate(trim(value).toStringUtf8()); } - catch (IllegalArgumentException | ArithmeticException e) { + catch (IllegalArgumentException | ArithmeticException | DateTimeException e) { throw new TrinoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to date: " + value.toStringUtf8(), e); } } diff --git a/core/trino-main/src/main/java/io/trino/util/DateTimeUtils.java b/core/trino-main/src/main/java/io/trino/util/DateTimeUtils.java index 0d568941d4e..7e0ca843011 100644 --- a/core/trino-main/src/main/java/io/trino/util/DateTimeUtils.java +++ b/core/trino-main/src/main/java/io/trino/util/DateTimeUtils.java @@ -18,6 +18,7 @@ import io.trino.spi.TrinoException; import io.trino.spi.type.TimeZoneKey; import io.trino.sql.tree.IntervalLiteral.IntervalField; +import org.assertj.core.util.VisibleForTesting; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.DurationFieldType; @@ -35,10 +36,13 @@ import org.joda.time.format.PeriodFormatterBuilder; import org.joda.time.format.PeriodParser; +import java.time.DateTimeException; +import java.time.LocalDate; import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.Optional; +import java.util.OptionalInt; import java.util.concurrent.TimeUnit; import java.util.stream.Stream; @@ -67,9 +71,66 @@ public static int parseDate(String value) // - the required format is 'YYYY-MM-DD' // - all components should be unsigned numbers // https://github.com/trinodb/trino/issues/10677 + + OptionalInt days = parseIfIso8861DateFormat(value); + if (days.isPresent()) { + return days.getAsInt(); + } return toIntExact(TimeUnit.MILLISECONDS.toDays(DATE_FORMATTER.parseMillis(value))); } + /** + * Parse date if it is in the format {@code yyyy-MM-dd}. + * + * @return the number of days since 1970-01-01 or empty when value does not match the expected format + * @throws DateTimeException when value matches the expected format but is invalid (month or day number out of range) + */ + @VisibleForTesting + static OptionalInt parseIfIso8861DateFormat(String value) + { + if (value.length() != 10 || value.charAt(4) != '-' || value.charAt(7) != '-') { + return OptionalInt.empty(); + } + + OptionalInt year = parseIntSimple(value, 0, 4); + if (year.isEmpty()) { + return OptionalInt.empty(); + } + + OptionalInt month = parseIntSimple(value, 5, 2); + if (month.isEmpty()) { + return OptionalInt.empty(); + } + + OptionalInt day = parseIntSimple(value, 8, 2); + if (day.isEmpty()) { + return OptionalInt.empty(); + } + + LocalDate date = LocalDate.of(year.getAsInt(), month.getAsInt(), day.getAsInt()); + return OptionalInt.of(toIntExact(date.toEpochDay())); + } + + /** + * Parse positive integer with radix 10. + * + * @return parsed value or empty if any non digit found + */ + private static OptionalInt parseIntSimple(String input, int offset, int length) + { + checkArgument(length > 0, "Invalid length %s", length); + + int result = 0; + for (int i = 0; i < length; i++) { + int digit = input.charAt(offset + i) - '0'; + if (digit < 0 || digit > 9) { + return OptionalInt.empty(); + } + result = result * 10 + digit; + } + return OptionalInt.of(result); + } + public static String printDate(int days) { return DATE_FORMATTER.print(TimeUnit.DAYS.toMillis(days)); diff --git a/core/trino-main/src/test/java/io/trino/util/TestDateTimeUtils.java b/core/trino-main/src/test/java/io/trino/util/TestDateTimeUtils.java new file mode 100644 index 00000000000..1857e256cea --- /dev/null +++ b/core/trino-main/src/test/java/io/trino/util/TestDateTimeUtils.java @@ -0,0 +1,77 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.util; + +import org.testng.annotations.Test; + +import java.time.DateTimeException; + +import static io.trino.util.DateTimeUtils.parseIfIso8861DateFormat; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.testng.Assert.assertEquals; + +public class TestDateTimeUtils +{ + @Test + @SuppressWarnings("OptionalGetWithoutIsPresent") + public void testParseIfIso8861DateFormat() + { + // valid dates + assertEquals(0, parseIfIso8861DateFormat("1970-01-01").getAsInt(), "1970-01-01"); + assertEquals(31, parseIfIso8861DateFormat("1970-02-01").getAsInt(), "1970-02-01"); + assertEquals(-31, parseIfIso8861DateFormat("1969-12-01").getAsInt(), "1969-12-01"); + assertEquals(19051, parseIfIso8861DateFormat("2022-02-28").getAsInt(), "2022-02-28"); + assertEquals(-719528, parseIfIso8861DateFormat("0000-01-01").getAsInt(), "0000-01-01"); + assertEquals(2932896, parseIfIso8861DateFormat("9999-12-31").getAsInt(), "9999-12-31"); + + // format invalid + // invalid length + assertThat(parseIfIso8861DateFormat("1970-2-01")).isEmpty(); + // invalid year0 + assertThat(parseIfIso8861DateFormat("a970-02-10")).isEmpty(); + // invalid year1 + assertThat(parseIfIso8861DateFormat("1p70-02-10")).isEmpty(); + // invalid year2 + assertThat(parseIfIso8861DateFormat("19%0-02-10")).isEmpty(); + // invalid year3 + assertThat(parseIfIso8861DateFormat("197o-02-10")).isEmpty(); + // invalid dash0 + assertThat(parseIfIso8861DateFormat("1970_02-01")).isEmpty(); + // invalid month0 + assertThat(parseIfIso8861DateFormat("1970- 2-01")).isEmpty(); + // invalid month1 + assertThat(parseIfIso8861DateFormat("1970-3.-01")).isEmpty(); + // invalid dash0 + assertThat(parseIfIso8861DateFormat("1970-02/01")).isEmpty(); + // invalid day0 + assertThat(parseIfIso8861DateFormat("1970-02-/1")).isEmpty(); + // invalid day1 + assertThat(parseIfIso8861DateFormat("1970-12-0l")).isEmpty(); + + assertThat(parseIfIso8861DateFormat("1970/02/01")).isEmpty(); + assertThat(parseIfIso8861DateFormat("Dec 24 2022")).isEmpty(); + + // format ok, but illegal value + assertThatThrownBy(() -> parseIfIso8861DateFormat("2022-02-29")) + .isInstanceOf(DateTimeException.class) + .hasMessage("Invalid date 'February 29' as '2022' is not a leap year"); + assertThatThrownBy(() -> parseIfIso8861DateFormat("1970-32-01")) + .isInstanceOf(DateTimeException.class) + .hasMessage("Invalid value for MonthOfYear (valid values 1 - 12): 32"); + assertThatThrownBy(() -> parseIfIso8861DateFormat("1970-02-41")) + .isInstanceOf(DateTimeException.class) + .hasMessage("Invalid value for DayOfMonth (valid values 1 - 28/31): 41"); + } +}