From 48b8a5c7562b0b200bd583a592003a960626b409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Wa=C5=9B?= Date: Sat, 28 Dec 2024 11:52:46 +0100 Subject: [PATCH 1/8] Avoid parsing min and max twice --- .../trino/plugin/faker/FakerColumnHandle.java | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerColumnHandle.java b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerColumnHandle.java index 941103460d22..96d870eafb8e 100644 --- a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerColumnHandle.java +++ b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerColumnHandle.java @@ -63,18 +63,16 @@ public static FakerColumnHandle of(int columnId, ColumnMetadata column, double d if (generator != null && !isCharacterColumn(column)) { throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property can only be set for CHAR, VARCHAR or VARBINARY columns".formatted(GENERATOR_PROPERTY)); } - // only parse min, max, and options to validate literals - FakerColumnHandle needs to be serializable, - // and some internal Trino types are not (Int128, LongTimestamp, LongTimestampWithTimeZone), so they cannot be stored in the handle as native types - String min = (String) column.getProperties().get(MIN_PROPERTY); + Object min; try { - Literal.parse(min, column.getType()); + min = Literal.parse((String) column.getProperties().get(MIN_PROPERTY), column.getType()); } catch (IllegalArgumentException e) { throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property must be a valid %s literal".formatted(MIN_PROPERTY, column.getType().getDisplayName()), e); } - String max = (String) column.getProperties().get(MAX_PROPERTY); + Object max; try { - Literal.parse(max, column.getType()); + max = Literal.parse((String) column.getProperties().get(MAX_PROPERTY), column.getType()); } catch (IllegalArgumentException e) { throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property must be a valid %s literal".formatted(MAX_PROPERTY, column.getType().getDisplayName()), e); @@ -116,19 +114,19 @@ private static boolean isCharacterColumn(ColumnMetadata column) return column.getType() instanceof CharType || column.getType() instanceof VarcharType || column.getType() instanceof VarbinaryType; } - private static Range range(Type type, String min, String max) + private static Range range(Type type, Object min, Object max) { requireNonNull(type, "type is null"); if (min == null && max == null) { return Range.all(type); } if (max == null) { - return Range.greaterThanOrEqual(type, Literal.parse(min, type)); + return Range.greaterThanOrEqual(type, min); } if (min == null) { - return Range.lessThanOrEqual(type, Literal.parse(max, type)); + return Range.lessThanOrEqual(type, max); } - return Range.range(type, Literal.parse(min, type), true, Literal.parse(max, type), true); + return Range.range(type, min, true, max, true); } private static List strings(Collection values) From 8550bd4ec944cc4327c562f8791337063438f174 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Wa=C5=9B?= Date: Sat, 28 Dec 2024 11:54:38 +0100 Subject: [PATCH 2/8] Avoid and/or in Faker docs --- docs/src/main/sphinx/connector/faker.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/main/sphinx/connector/faker.md b/docs/src/main/sphinx/connector/faker.md index 904307253323..d5fb56791659 100644 --- a/docs/src/main/sphinx/connector/faker.md +++ b/docs/src/main/sphinx/connector/faker.md @@ -175,7 +175,7 @@ Faker supports the following non-character types: - `UUID` You can not use generator expressions for non-character-based columns. To limit -their data range, set the `min` and/or `max` column properties - see +their data range, set the `min` and `max` column properties - see [](faker-usage). ### Unsupported types From f277994326c540b75ada379c2f96b6c3e8e96eb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Wa=C5=9B?= Date: Sat, 28 Dec 2024 13:39:09 +0100 Subject: [PATCH 3/8] Refactor FakerPageSource to have fewer faker references --- .../trino/plugin/faker/FakerPageSource.java | 89 +++++++++++++++---- 1 file changed, 70 insertions(+), 19 deletions(-) diff --git a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java index d911ebc10db5..41f1c638dc40 100644 --- a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java +++ b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java @@ -41,6 +41,7 @@ import io.trino.type.IpAddressType; import net.datafaker.Faker; +import java.math.BigDecimal; import java.math.BigInteger; import java.net.Inet4Address; import java.net.UnknownHostException; @@ -111,6 +112,8 @@ class FakerPageSource private final Random random; private final Faker faker; + private final SentenceGenerator sentenceGenerator; + private final BoundedSentenceGenerator boundedSentenceGenerator; private final long limit; private final List generators; private long completedRows; @@ -128,6 +131,8 @@ class FakerPageSource { this.faker = requireNonNull(faker, "faker is null"); this.random = requireNonNull(random, "random is null"); + this.sentenceGenerator = () -> Slices.utf8Slice(faker.lorem().sentence(3 + random.nextInt(38))); + this.boundedSentenceGenerator = (maxLength) -> Slices.utf8Slice(faker.lorem().maxLengthSentence(maxLength)); List types = requireNonNull(columns, "columns is null") .stream() .map(FakerColumnHandle::type) @@ -308,7 +313,7 @@ private Generator randomValueGenerator(FakerColumnHandle handle) if (!range.isAll()) { throw new TrinoException(INVALID_ROW_FILTER, "Predicates for varbinary columns are not supported"); } - return (blockBuilder) -> varType.writeSlice(blockBuilder, Slices.utf8Slice(faker.lorem().sentence(3 + random.nextInt(38)))); + return (blockBuilder) -> varType.writeSlice(blockBuilder, sentenceGenerator.get()); } if (type instanceof VarcharType varcharType) { if (!range.isAll()) { @@ -316,15 +321,15 @@ private Generator randomValueGenerator(FakerColumnHandle handle) } if (varcharType.getLength().isPresent()) { int length = varcharType.getLength().get(); - return (blockBuilder) -> varcharType.writeSlice(blockBuilder, Slices.utf8Slice(faker.lorem().maxLengthSentence(random.nextInt(length)))); + return (blockBuilder) -> varcharType.writeSlice(blockBuilder, boundedSentenceGenerator.get(random.nextInt(length))); } - return (blockBuilder) -> varcharType.writeSlice(blockBuilder, Slices.utf8Slice(faker.lorem().sentence(3 + random.nextInt(38)))); + return (blockBuilder) -> varcharType.writeSlice(blockBuilder, sentenceGenerator.get()); } if (type instanceof CharType charType) { if (!range.isAll()) { throw new TrinoException(INVALID_ROW_FILTER, "Predicates for char columns are not supported"); } - return (blockBuilder) -> charType.writeSlice(blockBuilder, Slices.utf8Slice(faker.lorem().maxLengthSentence(charType.getLength()))); + return (blockBuilder) -> charType.writeSlice(blockBuilder, boundedSentenceGenerator.get(charType.getLength())); } // not supported: ROW, ARRAY, MAP, JSON if (type instanceof IpAddressType) { @@ -435,7 +440,7 @@ private long generateLong(Range range, long factor) private long generateLongDefaults(Range range, long factor, long min, long max) { - return faker.number().numberBetween( + return numberBetween( roundDiv((long) range.getLowValue().orElse(min), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0), // TODO does the inclusion only apply to positive numbers? roundDiv((long) range.getHighValue().orElse(max), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0)) * factor; @@ -443,21 +448,21 @@ private long generateLongDefaults(Range range, long factor, long min, long max) private int generateInt(Range range) { - return (int) faker.number().numberBetween( + return (int) numberBetween( (long) range.getLowValue().orElse((long) Integer.MIN_VALUE) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0), (long) range.getHighValue().orElse((long) Integer.MAX_VALUE) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0)); } private short generateShort(Range range) { - return (short) faker.number().numberBetween( + return (short) numberBetween( (long) range.getLowValue().orElse((long) Short.MIN_VALUE) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0), (long) range.getHighValue().orElse((long) Short.MAX_VALUE) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0)); } private byte generateTiny(Range range) { - return (byte) faker.number().numberBetween( + return (byte) numberBetween( (long) range.getLowValue().orElse((long) Byte.MIN_VALUE) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0), (long) range.getHighValue().orElse((long) Byte.MAX_VALUE) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0)); } @@ -550,7 +555,7 @@ private Generator timestampGenerator(Range range, TimestampType tzType) LongTimestamp finalLow = low; LongTimestamp finalHigh = high; return (blockBuilder) -> { - long epochMicros = faker.number().numberBetween(finalLow.getEpochMicros(), finalHigh.getEpochMicros()); + long epochMicros = numberBetween(finalLow.getEpochMicros(), finalHigh.getEpochMicros()); if (tzType.getPrecision() <= 6) { epochMicros *= factor; tzType.writeObject(blockBuilder, new LongTimestamp(epochMicros * factor, 0)); @@ -558,17 +563,17 @@ private Generator timestampGenerator(Range range, TimestampType tzType) } int picosOfMicro; if (epochMicros == finalLow.getEpochMicros()) { - picosOfMicro = faker.number().numberBetween( + picosOfMicro = numberBetween( finalLow.getPicosOfMicro(), finalLow.getEpochMicros() == finalHigh.getEpochMicros() ? finalHigh.getPicosOfMicro() : (int) POWERS_OF_TEN[tzType.getPrecision() - 6] - 1); } else if (epochMicros == finalHigh.getEpochMicros()) { - picosOfMicro = faker.number().numberBetween(0, finalHigh.getPicosOfMicro()); + picosOfMicro = numberBetween(0, finalHigh.getPicosOfMicro()); } else { - picosOfMicro = faker.number().numberBetween(0, (int) POWERS_OF_TEN[tzType.getPrecision() - 6] - 1); + picosOfMicro = numberBetween(0, (int) POWERS_OF_TEN[tzType.getPrecision() - 6] - 1); } tzType.writeObject(blockBuilder, new LongTimestamp(epochMicros, picosOfMicro * factor)); }; @@ -584,7 +589,7 @@ private Generator timestampWithTimeZoneGenerator(Range range, TimestampWithTimeZ .orElse(TimeZoneKey.UTC_KEY)); long factor = POWERS_OF_TEN[3 - tzType.getPrecision()]; return (blockBuilder) -> { - long millis = faker.number().numberBetween( + long millis = numberBetween( roundDiv(unpackMillisUtc((long) range.getLowValue().orElse(Long.MIN_VALUE)), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0), roundDiv(unpackMillisUtc((long) range.getHighValue().orElse(Long.MAX_VALUE)), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0)) * factor; tzType.writeLong(blockBuilder, packDateTimeWithZone(millis, defaultTZ)); @@ -616,20 +621,20 @@ private Generator timestampWithTimeZoneGenerator(Range range, TimestampWithTimeZ LongTimestampWithTimeZone finalLow = low; LongTimestampWithTimeZone finalHigh = high; return (blockBuilder) -> { - long millis = faker.number().numberBetween(finalLow.getEpochMillis(), finalHigh.getEpochMillis()); + long millis = numberBetween(finalLow.getEpochMillis(), finalHigh.getEpochMillis()); int picosOfMilli; if (millis == finalLow.getEpochMillis()) { - picosOfMilli = faker.number().numberBetween( + picosOfMilli = numberBetween( finalLow.getPicosOfMilli(), finalLow.getEpochMillis() == finalHigh.getEpochMillis() ? finalHigh.getPicosOfMilli() : (int) POWERS_OF_TEN[tzType.getPrecision() - 3] - 1); } else if (millis == finalHigh.getEpochMillis()) { - picosOfMilli = faker.number().numberBetween(0, finalHigh.getPicosOfMilli()); + picosOfMilli = numberBetween(0, finalHigh.getPicosOfMilli()); } else { - picosOfMilli = faker.number().numberBetween(0, (int) POWERS_OF_TEN[tzType.getPrecision() - 3] - 1); + picosOfMilli = numberBetween(0, (int) POWERS_OF_TEN[tzType.getPrecision() - 3] - 1); } tzType.writeObject(blockBuilder, fromEpochMillisAndFraction(millis, picosOfMilli * factor, defaultTZ)); }; @@ -647,7 +652,7 @@ private Generator timeWithTimeZoneGenerator(Range range, TimeWithTimeZoneType ti long low = roundDiv(range.getLowValue().map(v -> unpackTimeNanos((long) v)).orElse(0L), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0); long high = roundDiv(range.getHighValue().map(v -> unpackTimeNanos((long) v)).orElse(NANOSECONDS_PER_DAY), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0); return (blockBuilder) -> { - long nanos = faker.number().numberBetween(low, high) * factor; + long nanos = numberBetween(low, high) * factor; timeType.writeLong(blockBuilder, packTimeWithTimeZone(nanos, offsetMinutes)); }; } @@ -667,11 +672,45 @@ private Generator timeWithTimeZoneGenerator(Range range, TimeWithTimeZoneType ti long longLow = roundDiv(low.getPicoseconds(), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0); long longHigh = roundDiv(high.getPicoseconds(), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0); return (blockBuilder) -> { - long picoseconds = faker.number().numberBetween(longLow, longHigh) * factor; + long picoseconds = numberBetween(longLow, longHigh) * factor; timeType.writeObject(blockBuilder, new LongTimeWithTimeZone(picoseconds, offsetMinutes)); }; } + private int numberBetween(int min, int max) + { + if (min == max) { + return min; + } + final int realMin = Math.min(min, max); + final int realMax = Math.max(min, max); + final int amplitude = realMax - realMin; + if (amplitude >= 0) { + return random.nextInt(amplitude) + realMin; + } + // handle overflow + return (int) numberBetween(realMin, (long) realMax); + } + + private long numberBetween(long min, long max) + { + if (min == max) { + return min; + } + final long realMin = Math.min(min, max); + final long realMax = Math.max(min, max); + final long amplitude = realMax - realMin; + if (amplitude >= 0) { + return random.nextLong(amplitude) + realMin; + } + // handle overflow + final BigDecimal bigMin = BigDecimal.valueOf(min); + final BigDecimal bigMax = BigDecimal.valueOf(max); + final BigDecimal randomValue = BigDecimal.valueOf(random.nextDouble()); + + return bigMin.add(bigMax.subtract(bigMin).multiply(randomValue)).longValue(); + } + private Generator generateIpV4(Range range) { if (!range.isAll()) { @@ -726,4 +765,16 @@ private interface Generator { void accept(BlockBuilder blockBuilder); } + + @FunctionalInterface + private interface SentenceGenerator + { + Slice get(); + } + + @FunctionalInterface + private interface BoundedSentenceGenerator + { + Slice get(int maxLength); + } } From b438a35710225d79ebe4b7c6f6bb02f3770091a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Wa=C5=9B?= Date: Sat, 28 Dec 2024 18:12:56 +0100 Subject: [PATCH 4/8] Extract typed ranges in Faker's page source --- .../trino/plugin/faker/FakerPageSource.java | 473 ++++++++++-------- 1 file changed, 270 insertions(+), 203 deletions(-) diff --git a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java index 41f1c638dc40..e2b671f35bd8 100644 --- a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java +++ b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java @@ -250,9 +250,9 @@ private Generator constraintedValueGenerator(FakerColumnHandle handle) private Generator randomValueGenerator(FakerColumnHandle handle) { - Range range = handle.domain().getValues().getRanges().getSpan(); + Range genericRange = handle.domain().getValues().getRanges().getSpan(); if (handle.generator() != null) { - if (!range.isAll()) { + if (!genericRange.isAll()) { throw new TrinoException(INVALID_ROW_FILTER, "Predicates for columns with a generator expression are not supported"); } return (blockBuilder) -> VARCHAR.writeSlice(blockBuilder, Slices.utf8Slice(faker.expression(handle.generator()))); @@ -260,63 +260,73 @@ private Generator randomValueGenerator(FakerColumnHandle handle) Type type = handle.type(); // check every type in order defined in StandardTypes if (BIGINT.equals(type)) { - return (blockBuilder) -> BIGINT.writeLong(blockBuilder, generateLong(range, 1)); + LongRange range = LongRange.of(genericRange); + return (blockBuilder) -> BIGINT.writeLong(blockBuilder, numberBetween(range.low, range.high)); } if (INTEGER.equals(type)) { - return (blockBuilder) -> INTEGER.writeLong(blockBuilder, generateInt(range)); + IntRange range = IntRange.of(genericRange); + return (blockBuilder) -> INTEGER.writeLong(blockBuilder, numberBetween(range.low, range.high)); } if (SMALLINT.equals(type)) { - return (blockBuilder) -> SMALLINT.writeLong(blockBuilder, generateShort(range)); + IntRange range = IntRange.of(genericRange, Short.MIN_VALUE, Short.MAX_VALUE); + return (blockBuilder) -> SMALLINT.writeLong(blockBuilder, numberBetween(range.low, range.high)); } if (TINYINT.equals(type)) { - return (blockBuilder) -> TINYINT.writeLong(blockBuilder, generateTiny(range)); + IntRange range = IntRange.of(genericRange, Byte.MIN_VALUE, Byte.MAX_VALUE); + return (blockBuilder) -> TINYINT.writeLong(blockBuilder, numberBetween(range.low, range.high)); } if (BOOLEAN.equals(type)) { - if (!range.isAll()) { + if (!genericRange.isAll()) { throw new TrinoException(INVALID_ROW_FILTER, "Range or not a single value predicates for boolean columns are not supported"); } return (blockBuilder) -> BOOLEAN.writeBoolean(blockBuilder, random.nextBoolean()); } if (DATE.equals(type)) { - return (blockBuilder) -> DATE.writeLong(blockBuilder, generateInt(range)); + IntRange range = IntRange.of(genericRange); + return (blockBuilder) -> DATE.writeLong(blockBuilder, numberBetween(range.low, range.high)); } if (type instanceof DecimalType decimalType) { - return decimalGenerator(range, decimalType); + return decimalGenerator(genericRange, decimalType); } if (REAL.equals(type)) { - return (blockBuilder) -> REAL.writeLong(blockBuilder, floatToRawIntBits(generateFloat(range))); + FloatRange range = FloatRange.of(genericRange); + return (blockBuilder) -> REAL.writeLong(blockBuilder, floatToRawIntBits(range.low + (range.high - range.low) * random.nextFloat())); } if (DOUBLE.equals(type)) { - return (blockBuilder) -> DOUBLE.writeDouble(blockBuilder, generateDouble(range)); + DoubleRange range = DoubleRange.of(genericRange); + return (blockBuilder) -> DOUBLE.writeDouble(blockBuilder, range.low + (range.high - range.low) * random.nextDouble()); } // not supported: HYPER_LOG_LOG, QDIGEST, TDIGEST, P4_HYPER_LOG_LOG if (INTERVAL_DAY_TIME.equals(type)) { - return (blockBuilder) -> INTERVAL_DAY_TIME.writeLong(blockBuilder, generateLong(range, 1)); + LongRange range = LongRange.of(genericRange); + return (blockBuilder) -> INTERVAL_DAY_TIME.writeLong(blockBuilder, numberBetween(range.low, range.high)); } if (INTERVAL_YEAR_MONTH.equals(type)) { - return (blockBuilder) -> INTERVAL_YEAR_MONTH.writeLong(blockBuilder, generateInt(range)); + IntRange range = IntRange.of(genericRange); + return (blockBuilder) -> INTERVAL_YEAR_MONTH.writeLong(blockBuilder, numberBetween(range.low, range.high)); } if (type instanceof TimestampType) { - return timestampGenerator(range, (TimestampType) type); + return timestampGenerator(genericRange, (TimestampType) type); } if (type instanceof TimestampWithTimeZoneType) { - return timestampWithTimeZoneGenerator(range, (TimestampWithTimeZoneType) type); + return timestampWithTimeZoneGenerator(genericRange, (TimestampWithTimeZoneType) type); } if (type instanceof TimeType timeType) { long factor = POWERS_OF_TEN[12 - timeType.getPrecision()]; - return (blockBuilder) -> timeType.writeLong(blockBuilder, generateLongDefaults(range, factor, 0, PICOSECONDS_PER_DAY)); + LongRange range = LongRange.of(genericRange, factor, 0, PICOSECONDS_PER_DAY); + return (blockBuilder) -> timeType.writeLong(blockBuilder, numberBetween(range.low, range.high) * factor); } if (type instanceof TimeWithTimeZoneType) { - return timeWithTimeZoneGenerator(range, (TimeWithTimeZoneType) type); + return timeWithTimeZoneGenerator(genericRange, (TimeWithTimeZoneType) type); } if (type instanceof VarbinaryType varType) { - if (!range.isAll()) { + if (!genericRange.isAll()) { throw new TrinoException(INVALID_ROW_FILTER, "Predicates for varbinary columns are not supported"); } return (blockBuilder) -> varType.writeSlice(blockBuilder, sentenceGenerator.get()); } if (type instanceof VarcharType varcharType) { - if (!range.isAll()) { + if (!genericRange.isAll()) { throw new TrinoException(INVALID_ROW_FILTER, "Predicates for varchar columns are not supported"); } if (varcharType.getLength().isPresent()) { @@ -326,18 +336,18 @@ private Generator randomValueGenerator(FakerColumnHandle handle) return (blockBuilder) -> varcharType.writeSlice(blockBuilder, sentenceGenerator.get()); } if (type instanceof CharType charType) { - if (!range.isAll()) { + if (!genericRange.isAll()) { throw new TrinoException(INVALID_ROW_FILTER, "Predicates for char columns are not supported"); } return (blockBuilder) -> charType.writeSlice(blockBuilder, boundedSentenceGenerator.get(charType.getLength())); } // not supported: ROW, ARRAY, MAP, JSON if (type instanceof IpAddressType) { - return generateIpV4(range); + return generateIpV4(genericRange); } // not supported: GEOMETRY if (type instanceof UuidType) { - return generateUUID(range); + return generateUUID(genericRange); } throw new IllegalArgumentException("Unsupported type " + type); @@ -433,116 +443,213 @@ private ObjectWriter objectWriter(Type type) throw new IllegalArgumentException("Unsupported type " + type); } - private long generateLong(Range range, long factor) + private Generator decimalGenerator(Range genericRange, DecimalType decimalType) { - return generateLongDefaults(range, factor, Long.MIN_VALUE, Long.MAX_VALUE); + if (decimalType.isShort()) { + ShortDecimalRange range = ShortDecimalRange.of(genericRange, decimalType.getPrecision()); + return (blockBuilder) -> decimalType.writeLong(blockBuilder, numberBetween(range.low, range.high)); + } + Int128Range range = Int128Range.of(genericRange); + BigInteger currentRange = BigInteger.valueOf(Long.MAX_VALUE); + BigInteger desiredRange = range.high.toBigInteger().subtract(range.low.toBigInteger()); + return (blockBuilder) -> decimalType.writeObject(blockBuilder, Int128.valueOf( + new BigInteger(63, random).multiply(desiredRange).divide(currentRange).add(range.low.toBigInteger()))); } - private long generateLongDefaults(Range range, long factor, long min, long max) + private Generator timestampGenerator(Range genericRange, TimestampType tzType) { - return numberBetween( - roundDiv((long) range.getLowValue().orElse(min), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0), - // TODO does the inclusion only apply to positive numbers? - roundDiv((long) range.getHighValue().orElse(max), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0)) * factor; + if (tzType.isShort()) { + long factor = POWERS_OF_TEN[6 - tzType.getPrecision()]; + LongRange range = LongRange.of(genericRange, factor); + return (blockBuilder) -> tzType.writeLong(blockBuilder, numberBetween(range.low, range.high) * factor); + } + LongTimestampRange range = LongTimestampRange.of(genericRange, tzType.getPrecision()); + if (tzType.getPrecision() <= 6) { + return (blockBuilder) -> { + long epochMicros = numberBetween(range.low.getEpochMicros(), range.high.getEpochMicros()); + tzType.writeObject(blockBuilder, new LongTimestamp(epochMicros * range.factor, 0)); + }; + } + return (blockBuilder) -> { + long epochMicros = numberBetween(range.low.getEpochMicros(), range.high.getEpochMicros()); + int picosOfMicro; + if (epochMicros == range.low.getEpochMicros()) { + picosOfMicro = numberBetween( + range.low.getPicosOfMicro(), + range.low.getEpochMicros() == range.high.getEpochMicros() ? + range.high.getPicosOfMicro() + : (int) POWERS_OF_TEN[tzType.getPrecision() - 6] - 1); + } + else if (epochMicros == range.high.getEpochMicros()) { + picosOfMicro = numberBetween(0, range.high.getPicosOfMicro()); + } + else { + picosOfMicro = numberBetween(0, (int) POWERS_OF_TEN[tzType.getPrecision() - 6] - 1); + } + tzType.writeObject(blockBuilder, new LongTimestamp(epochMicros, picosOfMicro * range.factor)); + }; } - private int generateInt(Range range) + private Generator timestampWithTimeZoneGenerator(Range genericRange, TimestampWithTimeZoneType tzType) { - return (int) numberBetween( - (long) range.getLowValue().orElse((long) Integer.MIN_VALUE) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0), - (long) range.getHighValue().orElse((long) Integer.MAX_VALUE) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0)); + if (tzType.isShort()) { + ShortTimestampWithTimeZoneRange range = ShortTimestampWithTimeZoneRange.of(genericRange, tzType.getPrecision()); + return (blockBuilder) -> { + long millis = numberBetween(range.low, range.high) * range.factor; + tzType.writeLong(blockBuilder, packDateTimeWithZone(millis, range.defaultTZ)); + }; + } + LongTimestampWithTimeZoneRange range = LongTimestampWithTimeZoneRange.of(genericRange, tzType.getPrecision()); + int picosOfMilliHigh = (int) POWERS_OF_TEN[tzType.getPrecision() - 3] - 1; + return (blockBuilder) -> { + long millis = numberBetween(range.low.getEpochMillis(), range.high.getEpochMillis()); + int picosOfMilli; + if (millis == range.low.getEpochMillis()) { + picosOfMilli = numberBetween( + range.low.getPicosOfMilli(), + range.low.getEpochMillis() == range.high.getEpochMillis() ? + range.high.getPicosOfMilli() + : picosOfMilliHigh); + } + else if (millis == range.high.getEpochMillis()) { + picosOfMilli = numberBetween(0, range.high.getPicosOfMilli()); + } + else { + picosOfMilli = numberBetween(0, picosOfMilliHigh); + } + tzType.writeObject(blockBuilder, fromEpochMillisAndFraction(millis, picosOfMilli * range.factor, range.defaultTZ)); + }; } - private short generateShort(Range range) + private Generator timeWithTimeZoneGenerator(Range genericRange, TimeWithTimeZoneType timeType) { - return (short) numberBetween( - (long) range.getLowValue().orElse((long) Short.MIN_VALUE) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0), - (long) range.getHighValue().orElse((long) Short.MAX_VALUE) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0)); + if (timeType.isShort()) { + ShortTimeWithTimeZoneRange range = ShortTimeWithTimeZoneRange.of(genericRange, timeType.getPrecision()); + return (blockBuilder) -> { + long nanos = numberBetween(range.low, range.high) * range.factor; + timeType.writeLong(blockBuilder, packTimeWithTimeZone(nanos, range.offsetMinutes)); + }; + } + LongTimeWithTimeZoneRange range = LongTimeWithTimeZoneRange.of(genericRange, timeType.getPrecision()); + return (blockBuilder) -> { + long picoseconds = numberBetween(range.low, range.high) * range.factor; + timeType.writeObject(blockBuilder, new LongTimeWithTimeZone(picoseconds, range.offsetMinutes)); + }; } - private byte generateTiny(Range range) + private record LongRange(long low, long high) { - return (byte) numberBetween( - (long) range.getLowValue().orElse((long) Byte.MIN_VALUE) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0), - (long) range.getHighValue().orElse((long) Byte.MAX_VALUE) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0)); - } + static LongRange of(Range range) + { + return of(range, 1, Long.MIN_VALUE, Long.MAX_VALUE); + } - private float generateFloat(Range range) - { - // TODO normalize ranges in applyFilter, so they always have bounds - float minValue = range.getLowValue().map(v -> intBitsToFloat(toIntExact((long) v))).orElse(Float.MIN_VALUE); - if (!range.isLowUnbounded() && !range.isLowInclusive()) { - minValue = Math.nextUp(minValue); + static LongRange of(Range range, long factor) + { + return of(range, factor, Long.MIN_VALUE, Long.MAX_VALUE); } - float maxValue = range.getHighValue().map(v -> intBitsToFloat(toIntExact((long) v))).orElse(Float.MAX_VALUE); - if (!range.isHighUnbounded() && !range.isHighInclusive()) { - maxValue = Math.nextDown(maxValue); + + static LongRange of(Range range, long factor, long defaultMin, long defaultMax) + { + return new LongRange( + roundDiv((long) range.getLowValue().orElse(defaultMin), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0), + roundDiv((long) range.getHighValue().orElse(defaultMax), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0)); } - return minValue + (maxValue - minValue) * random.nextFloat(); } - private double generateDouble(Range range) + private record IntRange(int low, int high) { - double minValue = (double) range.getLowValue().orElse(Double.MIN_VALUE); - if (!range.isLowUnbounded() && !range.isLowInclusive()) { - minValue = Math.nextUp(minValue); + static IntRange of(Range range) + { + return of(range, Integer.MIN_VALUE, Integer.MAX_VALUE); } - double maxValue = (double) range.getHighValue().orElse(Double.MAX_VALUE); - if (!range.isHighUnbounded() && !range.isHighInclusive()) { - maxValue = Math.nextDown(maxValue); + + static IntRange of(Range range, long defaultMin, long defaultMax) + { + return new IntRange( + toIntExact((long) range.getLowValue().orElse(defaultMin)) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0), + toIntExact((long) range.getHighValue().orElse(defaultMax)) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0)); } - return minValue + (maxValue - minValue) * random.nextDouble(); } - private Generator decimalGenerator(Range range, DecimalType decimalType) + private record FloatRange(float low, float high) { - if (decimalType.isShort()) { - long min = -999999999999999999L / POWERS_OF_TEN[18 - decimalType.getPrecision()]; - long max = 999999999999999999L / POWERS_OF_TEN[18 - decimalType.getPrecision()]; - return (blockBuilder) -> decimalType.writeLong(blockBuilder, generateLongDefaults(range, 1, min, max)); - } - Int128 low = (Int128) range.getLowValue().orElse(Decimals.MIN_UNSCALED_DECIMAL); - Int128 high = (Int128) range.getHighValue().orElse(Decimals.MAX_UNSCALED_DECIMAL); - if (!range.isLowUnbounded() && !range.isLowInclusive()) { - long[] result = new long[2]; - Int128Math.add(low.getHigh(), low.getLow(), 0, 1, result, 0); - low = Int128.valueOf(result); - } - if (!range.isHighUnbounded() && range.isHighInclusive()) { - long[] result = new long[2]; - Int128Math.add(high.getHigh(), high.getLow(), 0, 1, result, 0); - high = Int128.valueOf(result); + static FloatRange of(Range range) + { + float low = range.getLowValue().map(v -> intBitsToFloat(toIntExact((long) v))).orElse(Float.MIN_VALUE); + if (!range.isLowUnbounded() && !range.isLowInclusive()) { + low = Math.nextUp(low); + } + float high = range.getHighValue().map(v -> intBitsToFloat(toIntExact((long) v))).orElse(Float.MAX_VALUE); + if (!range.isHighUnbounded() && !range.isHighInclusive()) { + high = Math.nextDown(high); + } + return new FloatRange(low, high); } + } - BigInteger currentRange = BigInteger.valueOf(Long.MAX_VALUE); - BigInteger desiredRange = high.toBigInteger().subtract(low.toBigInteger()); - Int128 finalLow = low; - return (blockBuilder) -> decimalType.writeObject(blockBuilder, Int128.valueOf( - new BigInteger(63, random).multiply(desiredRange).divide(currentRange).add(finalLow.toBigInteger()))); + private record DoubleRange(double low, double high) + { + static DoubleRange of(Range range) + { + double low = (double) range.getLowValue().orElse(Double.MIN_VALUE); + if (!range.isLowUnbounded() && !range.isLowInclusive()) { + low = Math.nextUp(low); + } + double high = (double) range.getHighValue().orElse(Double.MAX_VALUE); + if (!range.isHighUnbounded() && !range.isHighInclusive()) { + high = Math.nextDown(high); + } + return new DoubleRange(low, high); + } } - private Generator timestampGenerator(Range range, TimestampType tzType) + private record ShortDecimalRange(long low, long high) { - if (tzType.isShort()) { - long factor = POWERS_OF_TEN[6 - tzType.getPrecision()]; - return (blockBuilder) -> tzType.writeLong(blockBuilder, generateLong(range, factor)); + static ShortDecimalRange of(Range range, int precision) + { + long defaultMin = -999999999999999999L / POWERS_OF_TEN[18 - precision]; + long defaultMax = 999999999999999999L / POWERS_OF_TEN[18 - precision]; + long low = (long) range.getLowValue().orElse(defaultMin) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0); + long high = (long) range.getHighValue().orElse(defaultMax) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0); + return new ShortDecimalRange(low, high); } - LongTimestamp low = (LongTimestamp) range.getLowValue() - .orElse(new LongTimestamp(Long.MIN_VALUE, 0)); - LongTimestamp high = (LongTimestamp) range.getHighValue() - .orElse(new LongTimestamp(Long.MAX_VALUE, PICOSECONDS_PER_MICROSECOND - 1)); - int factor; - if (tzType.getPrecision() <= 6) { - factor = (int) POWERS_OF_TEN[6 - tzType.getPrecision()]; - low = new LongTimestamp( - roundDiv(low.getEpochMicros(), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0), - 0); - high = new LongTimestamp( - roundDiv(high.getEpochMicros(), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0), - 0); + } + + private record Int128Range(Int128 low, Int128 high) + { + static Int128Range of(Range range) + { + Int128 low = (Int128) range.getLowValue().orElse(Decimals.MIN_UNSCALED_DECIMAL); + Int128 high = (Int128) range.getHighValue().orElse(Decimals.MAX_UNSCALED_DECIMAL); + if (!range.isLowUnbounded() && !range.isLowInclusive()) { + long[] result = new long[2]; + Int128Math.add(low.getHigh(), low.getLow(), 0, 1, result, 0); + low = Int128.valueOf(result); + } + if (!range.isHighUnbounded() && range.isHighInclusive()) { + long[] result = new long[2]; + Int128Math.add(high.getHigh(), high.getLow(), 0, 1, result, 0); + high = Int128.valueOf(result); + } + return new Int128Range(low, high); } - else { - factor = (int) POWERS_OF_TEN[12 - tzType.getPrecision()]; + } + + private record LongTimestampRange(LongTimestamp low, LongTimestamp high, int factor) + { + static LongTimestampRange of(Range range, int precision) + { + LongTimestamp low = (LongTimestamp) range.getLowValue().orElse(new LongTimestamp(Long.MIN_VALUE, 0)); + LongTimestamp high = (LongTimestamp) range.getHighValue().orElse(new LongTimestamp(Long.MAX_VALUE, PICOSECONDS_PER_MICROSECOND - 1)); + int factor; + if (precision <= 6) { + factor = (int) POWERS_OF_TEN[6 - precision]; + low = new LongTimestamp(roundDiv(low.getEpochMicros(), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0), 0); + high = new LongTimestamp(roundDiv(high.getEpochMicros(), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0), 0); + return new LongTimestampRange(low, high, factor); + } + factor = (int) POWERS_OF_TEN[12 - precision]; int lowPicosOfMicro = roundDiv(low.getPicosOfMicro(), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0); low = new LongTimestamp( low.getEpochMicros() - (lowPicosOfMicro < 0 ? 1 : 0), @@ -551,130 +658,90 @@ private Generator timestampGenerator(Range range, TimestampType tzType) high = new LongTimestamp( high.getEpochMicros() + (highPicosOfMicro > factor ? 1 : 0), highPicosOfMicro % factor); + return new LongTimestampRange(low, high, factor); } - LongTimestamp finalLow = low; - LongTimestamp finalHigh = high; - return (blockBuilder) -> { - long epochMicros = numberBetween(finalLow.getEpochMicros(), finalHigh.getEpochMicros()); - if (tzType.getPrecision() <= 6) { - epochMicros *= factor; - tzType.writeObject(blockBuilder, new LongTimestamp(epochMicros * factor, 0)); - return; - } - int picosOfMicro; - if (epochMicros == finalLow.getEpochMicros()) { - picosOfMicro = numberBetween( - finalLow.getPicosOfMicro(), - finalLow.getEpochMicros() == finalHigh.getEpochMicros() ? - finalHigh.getPicosOfMicro() - : (int) POWERS_OF_TEN[tzType.getPrecision() - 6] - 1); - } - else if (epochMicros == finalHigh.getEpochMicros()) { - picosOfMicro = numberBetween(0, finalHigh.getPicosOfMicro()); - } - else { - picosOfMicro = numberBetween(0, (int) POWERS_OF_TEN[tzType.getPrecision() - 6] - 1); - } - tzType.writeObject(blockBuilder, new LongTimestamp(epochMicros, picosOfMicro * factor)); - }; } - private Generator timestampWithTimeZoneGenerator(Range range, TimestampWithTimeZoneType tzType) + private record ShortTimestampWithTimeZoneRange(long low, long high, long factor, TimeZoneKey defaultTZ) { - if (tzType.isShort()) { + static ShortTimestampWithTimeZoneRange of(Range range, int precision) + { TimeZoneKey defaultTZ = range.getLowValue() .map(v -> unpackZoneKey((long) v)) .orElse(range.getHighValue() .map(v -> unpackZoneKey((long) v)) .orElse(TimeZoneKey.UTC_KEY)); - long factor = POWERS_OF_TEN[3 - tzType.getPrecision()]; - return (blockBuilder) -> { - long millis = numberBetween( - roundDiv(unpackMillisUtc((long) range.getLowValue().orElse(Long.MIN_VALUE)), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0), - roundDiv(unpackMillisUtc((long) range.getHighValue().orElse(Long.MAX_VALUE)), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0)) * factor; - tzType.writeLong(blockBuilder, packDateTimeWithZone(millis, defaultTZ)); - }; + long factor = POWERS_OF_TEN[3 - precision]; + long low = roundDiv(unpackMillisUtc((long) range.getLowValue().orElse(Long.MIN_VALUE)), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0); + long high = roundDiv(unpackMillisUtc((long) range.getHighValue().orElse(Long.MAX_VALUE)), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0); + return new ShortTimestampWithTimeZoneRange(low, high, factor, defaultTZ); } - short defaultTZ = range.getLowValue() - .map(v -> ((LongTimestampWithTimeZone) v).getTimeZoneKey()) - .orElse(range.getHighValue() - .map(v -> ((LongTimestampWithTimeZone) v).getTimeZoneKey()) - .orElse(TimeZoneKey.UTC_KEY.getKey())); - LongTimestampWithTimeZone low = (LongTimestampWithTimeZone) range.getLowValue() - .orElse(fromEpochMillisAndFraction(Long.MIN_VALUE >> 12, 0, defaultTZ)); - LongTimestampWithTimeZone high = (LongTimestampWithTimeZone) range.getHighValue() - .orElse(fromEpochMillisAndFraction(Long.MAX_VALUE >> 12, PICOSECONDS_PER_MILLISECOND - 1, defaultTZ)); - if (low.getTimeZoneKey() != high.getTimeZoneKey()) { - throw new TrinoException(INVALID_ROW_FILTER, "Range boundaries for timestamp with time zone columns must have the same time zone"); - } - int factor = (int) POWERS_OF_TEN[12 - tzType.getPrecision()]; - int lowPicosOfMilli = roundDiv(low.getPicosOfMilli(), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0); - low = fromEpochMillisAndFraction( - low.getEpochMillis() - (lowPicosOfMilli < 0 ? 1 : 0), - (lowPicosOfMilli + factor) % factor, - low.getTimeZoneKey()); - int highPicosOfMilli = roundDiv(high.getPicosOfMilli(), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0); - high = fromEpochMillisAndFraction( - high.getEpochMillis() + (highPicosOfMilli > factor ? 1 : 0), - highPicosOfMilli % factor, - high.getTimeZoneKey()); - LongTimestampWithTimeZone finalLow = low; - LongTimestampWithTimeZone finalHigh = high; - return (blockBuilder) -> { - long millis = numberBetween(finalLow.getEpochMillis(), finalHigh.getEpochMillis()); - int picosOfMilli; - if (millis == finalLow.getEpochMillis()) { - picosOfMilli = numberBetween( - finalLow.getPicosOfMilli(), - finalLow.getEpochMillis() == finalHigh.getEpochMillis() ? - finalHigh.getPicosOfMilli() - : (int) POWERS_OF_TEN[tzType.getPrecision() - 3] - 1); - } - else if (millis == finalHigh.getEpochMillis()) { - picosOfMilli = numberBetween(0, finalHigh.getPicosOfMilli()); - } - else { - picosOfMilli = numberBetween(0, (int) POWERS_OF_TEN[tzType.getPrecision() - 3] - 1); + } + + private record LongTimestampWithTimeZoneRange(LongTimestampWithTimeZone low, LongTimestampWithTimeZone high, int factor, short defaultTZ) + { + static LongTimestampWithTimeZoneRange of(Range range, int precision) + { + short defaultTZ = range.getLowValue() + .map(v -> ((LongTimestampWithTimeZone) v).getTimeZoneKey()) + .orElse(range.getHighValue() + .map(v -> ((LongTimestampWithTimeZone) v).getTimeZoneKey()) + .orElse(TimeZoneKey.UTC_KEY.getKey())); + LongTimestampWithTimeZone low = (LongTimestampWithTimeZone) range.getLowValue().orElse(fromEpochMillisAndFraction(Long.MIN_VALUE >> 12, 0, defaultTZ)); + LongTimestampWithTimeZone high = (LongTimestampWithTimeZone) range.getHighValue().orElse(fromEpochMillisAndFraction(Long.MAX_VALUE >> 12, PICOSECONDS_PER_MILLISECOND - 1, defaultTZ)); + if (low.getTimeZoneKey() != high.getTimeZoneKey()) { + throw new TrinoException(INVALID_ROW_FILTER, "Range boundaries for timestamp with time zone columns must have the same time zone"); } - tzType.writeObject(blockBuilder, fromEpochMillisAndFraction(millis, picosOfMilli * factor, defaultTZ)); - }; + int factor = (int) POWERS_OF_TEN[12 - precision]; + int lowPicosOfMilli = roundDiv(low.getPicosOfMilli(), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0); + low = fromEpochMillisAndFraction( + low.getEpochMillis() - (lowPicosOfMilli < 0 ? 1 : 0), + (lowPicosOfMilli + factor) % factor, + low.getTimeZoneKey()); + int highPicosOfMilli = roundDiv(high.getPicosOfMilli(), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0); + high = fromEpochMillisAndFraction( + high.getEpochMillis() + (highPicosOfMilli > factor ? 1 : 0), + highPicosOfMilli % factor, + high.getTimeZoneKey()); + return new LongTimestampWithTimeZoneRange(low, high, factor, defaultTZ); + } } - private Generator timeWithTimeZoneGenerator(Range range, TimeWithTimeZoneType timeType) + private record ShortTimeWithTimeZoneRange(long low, long high, long factor, int offsetMinutes) { - if (timeType.isShort()) { + static ShortTimeWithTimeZoneRange of(Range range, int precision) + { int offsetMinutes = range.getLowValue() .map(v -> unpackOffsetMinutes((long) v)) .orElse(range.getHighValue() .map(v -> unpackOffsetMinutes((long) v)) .orElse(0)); - long factor = POWERS_OF_TEN[9 - timeType.getPrecision()]; + long factor = POWERS_OF_TEN[9 - precision]; long low = roundDiv(range.getLowValue().map(v -> unpackTimeNanos((long) v)).orElse(0L), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0); long high = roundDiv(range.getHighValue().map(v -> unpackTimeNanos((long) v)).orElse(NANOSECONDS_PER_DAY), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0); - return (blockBuilder) -> { - long nanos = numberBetween(low, high) * factor; - timeType.writeLong(blockBuilder, packTimeWithTimeZone(nanos, offsetMinutes)); - }; + return new ShortTimeWithTimeZoneRange(low, high, factor, offsetMinutes); + } + } + + private record LongTimeWithTimeZoneRange(long low, long high, int factor, int offsetMinutes) + { + static LongTimeWithTimeZoneRange of(Range range, int precision) + { + int offsetMinutes = range.getLowValue() + .map(v -> ((LongTimeWithTimeZone) v).getOffsetMinutes()) + .orElse(range.getHighValue() + .map(v -> ((LongTimeWithTimeZone) v).getOffsetMinutes()) + .orElse(0)); + LongTimeWithTimeZone low = (LongTimeWithTimeZone) range.getLowValue().orElse(new LongTimeWithTimeZone(0, offsetMinutes)); + LongTimeWithTimeZone high = (LongTimeWithTimeZone) range.getHighValue().orElse(new LongTimeWithTimeZone(PICOSECONDS_PER_DAY, offsetMinutes)); + if (low.getOffsetMinutes() != high.getOffsetMinutes()) { + throw new TrinoException(INVALID_ROW_FILTER, "Range boundaries for time with time zone columns must have the same time zone"); + } + int factor = (int) POWERS_OF_TEN[12 - precision]; + long longLow = roundDiv(low.getPicoseconds(), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0); + long longHigh = roundDiv(high.getPicoseconds(), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0); + return new LongTimeWithTimeZoneRange(longLow, longHigh, factor, offsetMinutes); } - int offsetMinutes = range.getLowValue() - .map(v -> ((LongTimeWithTimeZone) v).getOffsetMinutes()) - .orElse(range.getHighValue() - .map(v -> ((LongTimeWithTimeZone) v).getOffsetMinutes()) - .orElse(0)); - LongTimeWithTimeZone low = (LongTimeWithTimeZone) range.getLowValue() - .orElse(new LongTimeWithTimeZone(0, offsetMinutes)); - LongTimeWithTimeZone high = (LongTimeWithTimeZone) range.getHighValue() - .orElse(new LongTimeWithTimeZone(PICOSECONDS_PER_DAY, offsetMinutes)); - if (low.getOffsetMinutes() != high.getOffsetMinutes()) { - throw new TrinoException(INVALID_ROW_FILTER, "Range boundaries for time with time zone columns must have the same time zone"); - } - int factor = (int) POWERS_OF_TEN[12 - timeType.getPrecision()]; - long longLow = roundDiv(low.getPicoseconds(), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0); - long longHigh = roundDiv(high.getPicoseconds(), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0); - return (blockBuilder) -> { - long picoseconds = numberBetween(longLow, longHigh) * factor; - timeType.writeObject(blockBuilder, new LongTimeWithTimeZone(picoseconds, offsetMinutes)); - }; } private int numberBetween(int min, int max) From ea4a044ea76f15ede662515350ca07fbfe77139e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Wa=C5=9B?= Date: Sun, 29 Dec 2024 09:41:30 +0100 Subject: [PATCH 5/8] Fix handling upper bounds in FakerPageSource Fix handling upper bounds for floating point types. The implementation did not account for rounding issue near the bound, and the test was using values outside of the allowed range. --- .../java/io/trino/plugin/faker/FakerPageSource.java | 12 ++++++------ .../java/io/trino/plugin/faker/TestFakerQueries.java | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java index e2b671f35bd8..b15aacb47be4 100644 --- a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java +++ b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java @@ -290,11 +290,11 @@ private Generator randomValueGenerator(FakerColumnHandle handle) } if (REAL.equals(type)) { FloatRange range = FloatRange.of(genericRange); - return (blockBuilder) -> REAL.writeLong(blockBuilder, floatToRawIntBits(range.low + (range.high - range.low) * random.nextFloat())); + return (blockBuilder) -> REAL.writeLong(blockBuilder, floatToRawIntBits(range.low == range.high ? range.low : random.nextFloat(range.low, range.high))); } if (DOUBLE.equals(type)) { DoubleRange range = DoubleRange.of(genericRange); - return (blockBuilder) -> DOUBLE.writeDouble(blockBuilder, range.low + (range.high - range.low) * random.nextDouble()); + return (blockBuilder) -> DOUBLE.writeDouble(blockBuilder, range.low == range.high ? range.low : random.nextDouble(range.low, range.high)); } // not supported: HYPER_LOG_LOG, QDIGEST, TDIGEST, P4_HYPER_LOG_LOG if (INTERVAL_DAY_TIME.equals(type)) { @@ -581,8 +581,8 @@ static FloatRange of(Range range) low = Math.nextUp(low); } float high = range.getHighValue().map(v -> intBitsToFloat(toIntExact((long) v))).orElse(Float.MAX_VALUE); - if (!range.isHighUnbounded() && !range.isHighInclusive()) { - high = Math.nextDown(high); + if (!range.isHighUnbounded() && range.isHighInclusive()) { + high = Math.nextUp(high); } return new FloatRange(low, high); } @@ -597,8 +597,8 @@ static DoubleRange of(Range range) low = Math.nextUp(low); } double high = (double) range.getHighValue().orElse(Double.MAX_VALUE); - if (!range.isHighUnbounded() && !range.isHighInclusive()) { - high = Math.nextDown(high); + if (!range.isHighUnbounded() && range.isHighInclusive()) { + high = Math.nextUp(high); } return new DoubleRange(low, high); } diff --git a/plugin/trino-faker/src/test/java/io/trino/plugin/faker/TestFakerQueries.java b/plugin/trino-faker/src/test/java/io/trino/plugin/faker/TestFakerQueries.java index bf48ea914bd1..503cfbed8319 100644 --- a/plugin/trino-faker/src/test/java/io/trino/plugin/faker/TestFakerQueries.java +++ b/plugin/trino-faker/src/test/java/io/trino/plugin/faker/TestFakerQueries.java @@ -311,8 +311,8 @@ void testSelectRangeProperties() .add(new TestDataType("rnd_decimal3", "decimal(38,0)", Map.of("min", "99999999999999999999999999999999999999"), "count(distinct rnd_decimal3)", "1")) .add(new TestDataType("rnd_decimal4", "decimal(38,38)", Map.of("min", "0.99999999999999999999999999999999999999"), "count(distinct rnd_decimal4)", "1")) .add(new TestDataType("rnd_decimal5", "decimal(5,2)", Map.of("min", "999.99"), "count(distinct rnd_decimal5)", "1")) - .add(new TestDataType("rnd_real", "real", Map.of("min", "1.4E45"), "count(distinct rnd_real)", "1")) - .add(new TestDataType("rnd_double", "double", Map.of("min", "4.9E324"), "count(distinct rnd_double)", "1")) + .add(new TestDataType("rnd_real", "real", Map.of("min", "3.4028235E38"), "count(distinct rnd_real)", "1")) + .add(new TestDataType("rnd_double", "double", Map.of("min", "1.7976931348623157E308"), "count(distinct rnd_double)", "1")) // interval literals can't represent smallest possible values allowed by the engine, so they're not included here // can't test timestamps because their extreme values cannot be expressed as literals .add(new TestDataType("rnd_time", "time", Map.of("min", "23:59:59.999"), "count(distinct rnd_time)", "1")) From da9c89900fc5d6eeae4495cbfb00b7c590c16af2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Wa=C5=9B?= Date: Sun, 29 Dec 2024 09:45:42 +0100 Subject: [PATCH 6/8] Refactor FakerPageSource Refactor to make subsequent commit's diff smaller --- .../trino/plugin/faker/FakerPageSource.java | 59 +++++++++---------- 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java index b15aacb47be4..32ed38b935e6 100644 --- a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java +++ b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java @@ -126,7 +126,7 @@ class FakerPageSource Faker faker, Random random, List columns, - long offset, + long rowOffset, long limit) { this.faker = requireNonNull(faker, "faker is null"); @@ -141,19 +141,19 @@ class FakerPageSource this.generators = columns .stream() - .map(column -> getGenerator(column, offset)) + .map(column -> getGenerator(column, rowOffset)) .collect(toImmutableList()); this.pageBuilder = new PageBuilder(types); } private Generator getGenerator( FakerColumnHandle column, - long offset) + long rowOffset) { if (ROW_ID_COLUMN_NAME.equals(column.name())) { return new Generator() { - long currentRowId = offset; + long currentRowId = rowOffset; @Override public void accept(BlockBuilder blockBuilder) @@ -163,7 +163,23 @@ public void accept(BlockBuilder blockBuilder) }; } - return constraintedValueGenerator(column); + if (column.domain().getValues().isDiscreteSet()) { + List values = column.domain().getValues().getDiscreteSet(); + ObjectWriter singleValueWriter = objectWriter(column.type()); + return (blockBuilder) -> singleValueWriter.accept(blockBuilder, values.get(random.nextInt(values.size()))); + } + Generator generator = randomValueGenerator(column); + if (column.nullProbability() == 0) { + return generator; + } + return (blockBuilder) -> { + if (random.nextDouble() <= column.nullProbability()) { + blockBuilder.appendNull(); + } + else { + generator.accept(blockBuilder); + } + }; } @Override @@ -227,27 +243,6 @@ public void close() closed = true; } - private Generator constraintedValueGenerator(FakerColumnHandle handle) - { - if (handle.domain().getValues().isDiscreteSet()) { - List values = handle.domain().getValues().getDiscreteSet(); - ObjectWriter singleValueWriter = objectWriter(handle.type()); - return (blockBuilder) -> singleValueWriter.accept(blockBuilder, values.get(random.nextInt(values.size()))); - } - Generator generator = randomValueGenerator(handle); - if (handle.nullProbability() == 0) { - return generator; - } - return (blockBuilder) -> { - if (random.nextDouble() <= handle.nullProbability()) { - blockBuilder.appendNull(); - } - else { - generator.accept(blockBuilder); - } - }; - } - private Generator randomValueGenerator(FakerColumnHandle handle) { Range genericRange = handle.domain().getValues().getRanges().getSpan(); @@ -305,19 +300,19 @@ private Generator randomValueGenerator(FakerColumnHandle handle) IntRange range = IntRange.of(genericRange); return (blockBuilder) -> INTERVAL_YEAR_MONTH.writeLong(blockBuilder, numberBetween(range.low, range.high)); } - if (type instanceof TimestampType) { - return timestampGenerator(genericRange, (TimestampType) type); + if (type instanceof TimestampType timestampType) { + return timestampGenerator(genericRange, timestampType); } - if (type instanceof TimestampWithTimeZoneType) { - return timestampWithTimeZoneGenerator(genericRange, (TimestampWithTimeZoneType) type); + if (type instanceof TimestampWithTimeZoneType timestampWithTimeZoneType) { + return timestampWithTimeZoneGenerator(genericRange, timestampWithTimeZoneType); } if (type instanceof TimeType timeType) { long factor = POWERS_OF_TEN[12 - timeType.getPrecision()]; LongRange range = LongRange.of(genericRange, factor, 0, PICOSECONDS_PER_DAY); return (blockBuilder) -> timeType.writeLong(blockBuilder, numberBetween(range.low, range.high) * factor); } - if (type instanceof TimeWithTimeZoneType) { - return timeWithTimeZoneGenerator(genericRange, (TimeWithTimeZoneType) type); + if (type instanceof TimeWithTimeZoneType timeWithTimeZoneType) { + return timeWithTimeZoneGenerator(genericRange, timeWithTimeZoneType); } if (type instanceof VarbinaryType varType) { if (!genericRange.isAll()) { From 6145b0610351dc462dc0eaed74780edbb01bc22a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Wa=C5=9B?= Date: Sun, 29 Dec 2024 09:57:03 +0100 Subject: [PATCH 7/8] Extract a method in FakerColumnHandle --- .../trino/plugin/faker/FakerColumnHandle.java | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerColumnHandle.java b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerColumnHandle.java index 96d870eafb8e..c1895eb2d2e5 100644 --- a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerColumnHandle.java +++ b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerColumnHandle.java @@ -51,6 +51,7 @@ public record FakerColumnHandle( { requireNonNull(name, "name is null"); requireNonNull(type, "type is null"); + requireNonNull(domain, "domain is null"); } public static FakerColumnHandle of(int columnId, ColumnMetadata column, double defaultNullProbability) @@ -63,20 +64,8 @@ public static FakerColumnHandle of(int columnId, ColumnMetadata column, double d if (generator != null && !isCharacterColumn(column)) { throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property can only be set for CHAR, VARCHAR or VARBINARY columns".formatted(GENERATOR_PROPERTY)); } - Object min; - try { - min = Literal.parse((String) column.getProperties().get(MIN_PROPERTY), column.getType()); - } - catch (IllegalArgumentException e) { - throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property must be a valid %s literal".formatted(MIN_PROPERTY, column.getType().getDisplayName()), e); - } - Object max; - try { - max = Literal.parse((String) column.getProperties().get(MAX_PROPERTY), column.getType()); - } - catch (IllegalArgumentException e) { - throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property must be a valid %s literal".formatted(MAX_PROPERTY, column.getType().getDisplayName()), e); - } + Object min = propertyValue(column, MIN_PROPERTY); + Object max = propertyValue(column, MAX_PROPERTY); Domain domain = Domain.all(column.getType()); if (min != null || max != null) { if (isCharacterColumn(column)) { @@ -114,6 +103,16 @@ private static boolean isCharacterColumn(ColumnMetadata column) return column.getType() instanceof CharType || column.getType() instanceof VarcharType || column.getType() instanceof VarbinaryType; } + private static Object propertyValue(ColumnMetadata column, String property) + { + try { + return Literal.parse((String) column.getProperties().get(property), column.getType()); + } + catch (IllegalArgumentException e) { + throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property must be a valid %s literal".formatted(property, column.getType().getDisplayName()), e); + } + } + private static Range range(Type type, Object min, Object max) { requireNonNull(type, "type is null"); From a6137e3de0f26efd956806693c056fc8fab7d837 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Wa=C5=9B?= Date: Sat, 28 Dec 2024 23:04:32 +0100 Subject: [PATCH 8/8] Support generating sequences in the Faker connector --- docs/src/main/sphinx/connector/faker.md | 3 + plugin/trino-faker/pom.xml | 11 +- .../io/trino/plugin/faker/ColumnInfo.java | 1 + .../trino/plugin/faker/FakerColumnHandle.java | 44 ++- .../io/trino/plugin/faker/FakerConnector.java | 8 +- .../io/trino/plugin/faker/FakerMetadata.java | 5 +- .../trino/plugin/faker/FakerPageSource.java | 339 +++++++++++++++--- .../trino/plugin/faker/TestFakerQueries.java | 45 +++ 8 files changed, 399 insertions(+), 57 deletions(-) diff --git a/docs/src/main/sphinx/connector/faker.md b/docs/src/main/sphinx/connector/faker.md index d5fb56791659..21260339de23 100644 --- a/docs/src/main/sphinx/connector/faker.md +++ b/docs/src/main/sphinx/connector/faker.md @@ -111,6 +111,9 @@ The following table details all supported column properties. * - `allowed_values` - List of allowed values. Cannot be set together with the `min`, or `max` properties. +* - `step` + - If set, generate sequential values with this step. For date and time columns + set this to a duration. Cannot be set for character-based type columns. ::: ### Character types diff --git a/plugin/trino-faker/pom.xml b/plugin/trino-faker/pom.xml index 8a87b91f122e..c0866b9f3445 100644 --- a/plugin/trino-faker/pom.xml +++ b/plugin/trino-faker/pom.xml @@ -40,6 +40,11 @@ configuration + + io.airlift + units + + io.trino trino-main @@ -119,12 +124,6 @@ runtime - - io.airlift - units - runtime - - io.trino trino-client diff --git a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/ColumnInfo.java b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/ColumnInfo.java index bd52ae4fa776..9bd28af238b4 100644 --- a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/ColumnInfo.java +++ b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/ColumnInfo.java @@ -27,6 +27,7 @@ public record ColumnInfo(FakerColumnHandle handle, ColumnMetadata metadata) public static final String MIN_PROPERTY = "min"; public static final String MAX_PROPERTY = "max"; public static final String ALLOWED_VALUES_PROPERTY = "allowed_values"; + public static final String STEP_PROPERTY = "step"; public ColumnInfo { diff --git a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerColumnHandle.java b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerColumnHandle.java index c1895eb2d2e5..c7af196596ca 100644 --- a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerColumnHandle.java +++ b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerColumnHandle.java @@ -15,6 +15,7 @@ package io.trino.plugin.faker; import com.google.common.collect.ImmutableList; +import io.airlift.units.Duration; import io.trino.spi.TrinoException; import io.trino.spi.connector.ColumnHandle; import io.trino.spi.connector.ColumnMetadata; @@ -22,20 +23,29 @@ import io.trino.spi.predicate.Range; import io.trino.spi.predicate.ValueSet; import io.trino.spi.type.CharType; +import io.trino.spi.type.TimeType; +import io.trino.spi.type.TimeWithTimeZoneType; +import io.trino.spi.type.TimestampType; +import io.trino.spi.type.TimestampWithTimeZoneType; import io.trino.spi.type.Type; import io.trino.spi.type.VarbinaryType; import io.trino.spi.type.VarcharType; import java.util.Collection; import java.util.List; +import java.util.concurrent.TimeUnit; +import static com.google.common.base.Preconditions.checkState; import static com.google.common.collect.ImmutableList.toImmutableList; import static io.trino.plugin.faker.ColumnInfo.ALLOWED_VALUES_PROPERTY; import static io.trino.plugin.faker.ColumnInfo.GENERATOR_PROPERTY; import static io.trino.plugin.faker.ColumnInfo.MAX_PROPERTY; import static io.trino.plugin.faker.ColumnInfo.MIN_PROPERTY; import static io.trino.plugin.faker.ColumnInfo.NULL_PROBABILITY_PROPERTY; +import static io.trino.plugin.faker.ColumnInfo.STEP_PROPERTY; import static io.trino.spi.StandardErrorCode.INVALID_COLUMN_PROPERTY; +import static io.trino.spi.type.BigintType.BIGINT; +import static io.trino.spi.type.DateType.DATE; import static java.util.Objects.requireNonNull; public record FakerColumnHandle( @@ -44,7 +54,8 @@ public record FakerColumnHandle( Type type, double nullProbability, String generator, - Domain domain) + Domain domain, + ValueSet step) implements ColumnHandle { public FakerColumnHandle @@ -52,6 +63,8 @@ public record FakerColumnHandle( requireNonNull(name, "name is null"); requireNonNull(type, "type is null"); requireNonNull(domain, "domain is null"); + requireNonNull(step, "step is null"); + checkState(step.isNone() || step.isSingleValue(), "step must be a single value"); } public static FakerColumnHandle of(int columnId, ColumnMetadata column, double defaultNullProbability) @@ -95,7 +108,8 @@ public static FakerColumnHandle of(int columnId, ColumnMetadata column, double d column.getType(), nullProbability, generator, - domain); + domain, + stepValue(column)); } private static boolean isCharacterColumn(ColumnMetadata column) @@ -113,6 +127,32 @@ private static Object propertyValue(ColumnMetadata column, String property) } } + private static ValueSet stepValue(ColumnMetadata column) + { + Type type = column.getType(); + String rawStep = (String) column.getProperties().get(STEP_PROPERTY); + if (rawStep == null) { + return ValueSet.none(type); + } + if (isCharacterColumn(column)) { + throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property cannot be set for CHAR, VARCHAR or VARBINARY columns".formatted(STEP_PROPERTY)); + } + if (DATE.equals(column.getType()) || type instanceof TimestampType || type instanceof TimestampWithTimeZoneType || type instanceof TimeType || type instanceof TimeWithTimeZoneType) { + try { + return ValueSet.of(BIGINT, Duration.valueOf(rawStep).roundTo(TimeUnit.NANOSECONDS)); + } + catch (IllegalArgumentException e) { + throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property for a %s column must be a valid duration literal".formatted(STEP_PROPERTY, column.getType().getDisplayName()), e); + } + } + try { + return ValueSet.of(type, Literal.parse(rawStep, type)); + } + catch (IllegalArgumentException e) { + throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property for a %s column must be a valid %s literal".formatted(STEP_PROPERTY, column.getType().getDisplayName(), type.getDisplayName()), e); + } + } + private static Range range(Type type, Object min, Object max) { requireNonNull(type, "type is null"); diff --git a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerConnector.java b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerConnector.java index 2a5819c04b47..734fa513b355 100644 --- a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerConnector.java +++ b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerConnector.java @@ -39,6 +39,7 @@ import static io.trino.plugin.faker.ColumnInfo.ALLOWED_VALUES_PROPERTY; import static io.trino.plugin.faker.ColumnInfo.MAX_PROPERTY; import static io.trino.plugin.faker.ColumnInfo.MIN_PROPERTY; +import static io.trino.plugin.faker.ColumnInfo.STEP_PROPERTY; import static io.trino.spi.StandardErrorCode.INVALID_COLUMN_PROPERTY; import static io.trino.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY; import static io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY; @@ -188,7 +189,12 @@ public List> getColumnProperties() value -> ((List) value).stream() .map(String.class::cast) .collect(toImmutableList()), - value -> value)); + value -> value), + stringProperty( + STEP_PROPERTY, + "If set, generate sequential values with this step. For date and time columns set this to a duration", + null, + false)); } private static void checkProperty(boolean expression, ErrorCodeSupplier errorCode, String errorMessage) diff --git a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerMetadata.java b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerMetadata.java index 275d04d62004..46d246e9d8d0 100644 --- a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerMetadata.java +++ b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerMetadata.java @@ -43,6 +43,8 @@ import io.trino.spi.function.FunctionMetadata; import io.trino.spi.function.SchemaFunctionName; import io.trino.spi.predicate.Domain; +import io.trino.spi.predicate.Range; +import io.trino.spi.predicate.ValueSet; import io.trino.spi.security.TrinoPrincipal; import io.trino.spi.statistics.ComputedStatistics; import io.trino.spi.type.BigintType; @@ -336,7 +338,8 @@ public synchronized FakerOutputTableHandle beginCreateTable(ConnectorSession ses BigintType.BIGINT, 0, "", - Domain.all(BigintType.BIGINT)), + Domain.create(ValueSet.ofRanges(Range.greaterThanOrEqual(BigintType.BIGINT, 0L)), false), + ValueSet.of(BigintType.BIGINT, 1L)), ColumnMetadata.builder() .setName(ROW_ID_COLUMN_NAME) .setType(BigintType.BIGINT) diff --git a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java index 32ed38b935e6..e1df2900bccb 100644 --- a/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java +++ b/plugin/trino-faker/src/main/java/io/trino/plugin/faker/FakerPageSource.java @@ -50,7 +50,6 @@ import java.util.Random; import static com.google.common.collect.ImmutableList.toImmutableList; -import static io.trino.plugin.faker.FakerMetadata.ROW_ID_COLUMN_NAME; import static io.trino.spi.StandardErrorCode.INVALID_ROW_FILTER; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.BooleanType.BOOLEAN; @@ -67,9 +66,12 @@ import static io.trino.spi.type.RealType.REAL; import static io.trino.spi.type.SmallintType.SMALLINT; import static io.trino.spi.type.Timestamps.NANOSECONDS_PER_DAY; +import static io.trino.spi.type.Timestamps.NANOSECONDS_PER_MICROSECOND; +import static io.trino.spi.type.Timestamps.NANOSECONDS_PER_MILLISECOND; import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_DAY; import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_MICROSECOND; import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_MILLISECOND; +import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_NANOSECOND; import static io.trino.spi.type.Timestamps.roundDiv; import static io.trino.spi.type.TinyintType.TINYINT; import static io.trino.spi.type.UuidType.UUID; @@ -150,25 +152,18 @@ private Generator getGenerator( FakerColumnHandle column, long rowOffset) { - if (ROW_ID_COLUMN_NAME.equals(column.name())) { - return new Generator() - { - long currentRowId = rowOffset; - - @Override - public void accept(BlockBuilder blockBuilder) - { - BIGINT.writeLong(blockBuilder, currentRowId++); - } - }; - } - if (column.domain().getValues().isDiscreteSet()) { List values = column.domain().getValues().getDiscreteSet(); ObjectWriter singleValueWriter = objectWriter(column.type()); return (blockBuilder) -> singleValueWriter.accept(blockBuilder, values.get(random.nextInt(values.size()))); } - Generator generator = randomValueGenerator(column); + Generator generator; + if (!column.step().isNone()) { + generator = sequenceGenerator(column, rowOffset); + } + else { + generator = randomValueGenerator(column); + } if (column.nullProbability() == 0) { return generator; } @@ -243,6 +238,103 @@ public void close() closed = true; } + private Generator sequenceGenerator(FakerColumnHandle handle, long rowOffset) + { + SequenceWriter writer = sequenceWriter(handle); + + return new Generator() + { + long currentRowId = rowOffset; + + @Override + public void accept(BlockBuilder blockBuilder) + { + writer.accept(blockBuilder, currentRowId++); + } + }; + } + + private SequenceWriter sequenceWriter(FakerColumnHandle handle) + { + Range genericRange = handle.domain().getValues().getRanges().getSpan(); + Type type = handle.type(); + // check every type in order defined in StandardTypes + // not supported: BOOLEAN, HYPER_LOG_LOG, QDIGEST, TDIGEST, P4_HYPER_LOG_LOG, VARBINARY, VARCHAR, CHAR, ROW, ARRAY, MAP, JSON, IPADDRESS, GEOMETRY, UUID + if (BIGINT.equals(type)) { + LongRange range = LongRange.of(genericRange, 1, (long) handle.step().getSingleValue()); + return (blockBuilder, rowId) -> BIGINT.writeLong(blockBuilder, range.at(rowId)); + } + if (INTEGER.equals(type)) { + IntRange range = IntRange.of(genericRange, (long) handle.step().getSingleValue()); + return (blockBuilder, rowId) -> INTEGER.writeLong(blockBuilder, range.at(rowId)); + } + if (SMALLINT.equals(type)) { + IntRange range = IntRange.of(genericRange, Short.MIN_VALUE, Short.MAX_VALUE, (long) handle.step().getSingleValue()); + return (blockBuilder, rowId) -> SMALLINT.writeLong(blockBuilder, range.at(rowId)); + } + if (TINYINT.equals(type)) { + IntRange range = IntRange.of(genericRange, Byte.MIN_VALUE, Byte.MAX_VALUE, (long) handle.step().getSingleValue()); + return (blockBuilder, rowId) -> TINYINT.writeLong(blockBuilder, range.at(rowId)); + } + if (DATE.equals(type)) { + IntRange range = IntRange.of(genericRange, (long) handle.step().getSingleValue()); + return (blockBuilder, rowId) -> DATE.writeLong(blockBuilder, range.at(rowId, NANOSECONDS_PER_DAY)); + } + if (type instanceof DecimalType decimalType) { + if (decimalType.isShort()) { + ShortDecimalRange range = ShortDecimalRange.of(genericRange, decimalType.getPrecision(), (long) handle.step().getSingleValue()); + return (blockBuilder, rowId) -> decimalType.writeLong(blockBuilder, range.at(rowId)); + } + Int128Range range = Int128Range.of(genericRange, (Int128) handle.step().getSingleValue()); + return (blockBuilder, rowId) -> decimalType.writeObject(blockBuilder, range.at(rowId)); + } + if (REAL.equals(type)) { + FloatRange range = FloatRange.of(genericRange, intBitsToFloat(toIntExact((long) handle.step().getSingleValue()))); + return (blockBuilder, rowId) -> REAL.writeLong(blockBuilder, floatToRawIntBits(range.at(rowId))); + } + if (DOUBLE.equals(type)) { + DoubleRange range = DoubleRange.of(genericRange, (double) handle.step().getSingleValue()); + return (blockBuilder, rowId) -> DOUBLE.writeDouble(blockBuilder, range.at(rowId)); + } + if (INTERVAL_DAY_TIME.equals(type) || INTERVAL_YEAR_MONTH.equals(type)) { + // step is seconds or months + IntRange range = IntRange.of(genericRange, (long) handle.step().getSingleValue()); + return (blockBuilder, rowId) -> type.writeLong(blockBuilder, range.at(rowId)); + } + if (type instanceof TimestampType timestampType) { + if (timestampType.isShort()) { + long factor = POWERS_OF_TEN[6 - timestampType.getPrecision()]; + LongRange range = LongRange.of(genericRange, factor, (long) handle.step().getSingleValue()); + return (blockBuilder, rowId) -> timestampType.writeLong(blockBuilder, range.at(rowId, factor * NANOSECONDS_PER_MICROSECOND) * factor); + } + LongTimestampRange range = LongTimestampRange.of(genericRange, timestampType.getPrecision(), (long) handle.step().getSingleValue()); + return (blockBuilder, rowId) -> timestampType.writeObject(blockBuilder, range.at(rowId, NANOSECONDS_PER_MICROSECOND)); + } + if (type instanceof TimestampWithTimeZoneType tzType) { + if (tzType.isShort()) { + ShortTimestampWithTimeZoneRange range = ShortTimestampWithTimeZoneRange.of(genericRange, tzType.getPrecision(), (long) handle.step().getSingleValue()); + return (blockBuilder, rowId) -> tzType.writeLong(blockBuilder, range.at(rowId, NANOSECONDS_PER_MILLISECOND)); + } + LongTimestampWithTimeZoneRange range = LongTimestampWithTimeZoneRange.of(genericRange, tzType.getPrecision(), (long) handle.step().getSingleValue()); + return (blockBuilder, rowId) -> tzType.writeObject(blockBuilder, range.at(rowId, NANOSECONDS_PER_MILLISECOND)); + } + if (type instanceof TimeType timeType) { + long factor = POWERS_OF_TEN[12 - timeType.getPrecision()]; + LongRange range = LongRange.of(genericRange, factor, 0, PICOSECONDS_PER_DAY, (long) handle.step().getSingleValue() * PICOSECONDS_PER_NANOSECOND); + return (blockBuilder, rowId) -> timeType.writeLong(blockBuilder, range.at(rowId, factor) * factor); + } + if (type instanceof TimeWithTimeZoneType timeType) { + if (timeType.isShort()) { + ShortTimeWithTimeZoneRange range = ShortTimeWithTimeZoneRange.of(genericRange, timeType.getPrecision(), (long) handle.step().getSingleValue()); + return (blockBuilder, rowId) -> timeType.writeLong(blockBuilder, range.at(rowId)); + } + LongTimeWithTimeZoneRange range = LongTimeWithTimeZoneRange.of(genericRange, timeType.getPrecision(), (long) handle.step().getSingleValue() * PICOSECONDS_PER_NANOSECOND); + return (blockBuilder, rowId) -> timeType.writeObject(blockBuilder, range.at(rowId)); + } + + throw new IllegalArgumentException("Unsupported type " + type); + } + private Generator randomValueGenerator(FakerColumnHandle handle) { Range genericRange = handle.domain().getValues().getRanges().getSpan(); @@ -532,44 +624,91 @@ private Generator timeWithTimeZoneGenerator(Range genericRange, TimeWithTimeZone }; } - private record LongRange(long low, long high) + private record LongRange(long low, long high, long step) { static LongRange of(Range range) { - return of(range, 1, Long.MIN_VALUE, Long.MAX_VALUE); + return of(range, 1, Long.MIN_VALUE, Long.MAX_VALUE, 1); } static LongRange of(Range range, long factor) { - return of(range, factor, Long.MIN_VALUE, Long.MAX_VALUE); + return of(range, factor, Long.MIN_VALUE, Long.MAX_VALUE, 1); + } + + static LongRange of(Range range, long factor, long step) + { + return of(range, factor, Long.MIN_VALUE, Long.MAX_VALUE, step); } static LongRange of(Range range, long factor, long defaultMin, long defaultMax) + { + return of(range, factor, defaultMin, defaultMax, 1); + } + + static LongRange of(Range range, long factor, long defaultMin, long defaultMax, long step) { return new LongRange( roundDiv((long) range.getLowValue().orElse(defaultMin), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0), - roundDiv((long) range.getHighValue().orElse(defaultMax), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0)); + roundDiv((long) range.getHighValue().orElse(defaultMax), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0), + step); + } + + long at(long index) + { + return Math.min(low + index * step, high - 1); + } + + long at(long index, long factor) + { + return Math.min(low + roundDiv(index * step, factor), high - 1); } } - private record IntRange(int low, int high) + private record IntRange(int low, int high, long step) { static IntRange of(Range range) { - return of(range, Integer.MIN_VALUE, Integer.MAX_VALUE); + return of(range, Integer.MIN_VALUE, Integer.MAX_VALUE, 1); + } + + static IntRange of(Range range, long step) + { + return of(range, Integer.MIN_VALUE, Integer.MAX_VALUE, step); } static IntRange of(Range range, long defaultMin, long defaultMax) + { + return of(range, defaultMin, defaultMax, 1); + } + + static IntRange of(Range range, long defaultMin, long defaultMax, long step) { return new IntRange( toIntExact((long) range.getLowValue().orElse(defaultMin)) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0), - toIntExact((long) range.getHighValue().orElse(defaultMax)) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0)); + toIntExact((long) range.getHighValue().orElse(defaultMax)) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0), + step); + } + + long at(long index) + { + return Math.min(low + index * step, high - 1); + } + + long at(long index, long factor) + { + return Math.min(low + roundDiv(index * step, factor), high - 1); } } - private record FloatRange(float low, float high) + private record FloatRange(float low, float high, float step) { static FloatRange of(Range range) + { + return of(range, 1); + } + + static FloatRange of(Range range, float step) { float low = range.getLowValue().map(v -> intBitsToFloat(toIntExact((long) v))).orElse(Float.MIN_VALUE); if (!range.isLowUnbounded() && !range.isLowInclusive()) { @@ -579,13 +718,23 @@ static FloatRange of(Range range) if (!range.isHighUnbounded() && range.isHighInclusive()) { high = Math.nextUp(high); } - return new FloatRange(low, high); + return new FloatRange(low, high, step); + } + + float at(long index) + { + return Math.min(low + index * step, Math.nextDown(high)); } } - private record DoubleRange(double low, double high) + private record DoubleRange(double low, double high, double step) { static DoubleRange of(Range range) + { + return of(range, 1); + } + + static DoubleRange of(Range range, double step) { double low = (double) range.getLowValue().orElse(Double.MIN_VALUE); if (!range.isLowUnbounded() && !range.isLowInclusive()) { @@ -595,45 +744,80 @@ static DoubleRange of(Range range) if (!range.isHighUnbounded() && range.isHighInclusive()) { high = Math.nextUp(high); } - return new DoubleRange(low, high); + return new DoubleRange(low, high, step); + } + + double at(long index) + { + return Math.min(low + index * step, Math.nextDown(high)); } } - private record ShortDecimalRange(long low, long high) + private record ShortDecimalRange(long low, long high, long step) { static ShortDecimalRange of(Range range, int precision) + { + return of(range, precision, 1); + } + + static ShortDecimalRange of(Range range, int precision, long step) { long defaultMin = -999999999999999999L / POWERS_OF_TEN[18 - precision]; long defaultMax = 999999999999999999L / POWERS_OF_TEN[18 - precision]; long low = (long) range.getLowValue().orElse(defaultMin) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0); long high = (long) range.getHighValue().orElse(defaultMax) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0); - return new ShortDecimalRange(low, high); + return new ShortDecimalRange(low, high, step); + } + + long at(long index) + { + return Math.min(low + index * step, high - 1); } } - private record Int128Range(Int128 low, Int128 high) + private record Int128Range(Int128 low, Int128 high, Int128 step) { static Int128Range of(Range range) + { + return of(range, Int128.ONE); + } + + static Int128Range of(Range range, Int128 step) { Int128 low = (Int128) range.getLowValue().orElse(Decimals.MIN_UNSCALED_DECIMAL); Int128 high = (Int128) range.getHighValue().orElse(Decimals.MAX_UNSCALED_DECIMAL); if (!range.isLowUnbounded() && !range.isLowInclusive()) { - long[] result = new long[2]; - Int128Math.add(low.getHigh(), low.getLow(), 0, 1, result, 0); - low = Int128.valueOf(result); + low = add(low, Int128.ONE); } if (!range.isHighUnbounded() && range.isHighInclusive()) { - long[] result = new long[2]; - Int128Math.add(high.getHigh(), high.getLow(), 0, 1, result, 0); - high = Int128.valueOf(result); + high = add(high, Int128.ONE); } - return new Int128Range(low, high); + return new Int128Range(low, high, step); + } + + Int128 at(long index) + { + Int128 nextValue = add(low, Int128Math.multiply(Int128.valueOf(index), step)); + Int128 highInclusive = Int128Math.subtract(high, Int128.ONE); + return highInclusive.compareTo(nextValue) < 0 ? highInclusive : nextValue; } } - private record LongTimestampRange(LongTimestamp low, LongTimestamp high, int factor) + private static Int128 add(Int128 left, Int128 right) + { + long[] result = new long[2]; + Int128Math.add(left.getHigh(), left.getLow(), right.getHigh(), right.getLow(), result, 0); + return Int128.valueOf(result); + } + + private record LongTimestampRange(LongTimestamp low, LongTimestamp high, int factor, long step) { static LongTimestampRange of(Range range, int precision) + { + return of(range, precision, 1); + } + + static LongTimestampRange of(Range range, int precision, long step) { LongTimestamp low = (LongTimestamp) range.getLowValue().orElse(new LongTimestamp(Long.MIN_VALUE, 0)); LongTimestamp high = (LongTimestamp) range.getHighValue().orElse(new LongTimestamp(Long.MAX_VALUE, PICOSECONDS_PER_MICROSECOND - 1)); @@ -642,7 +826,7 @@ static LongTimestampRange of(Range range, int precision) factor = (int) POWERS_OF_TEN[6 - precision]; low = new LongTimestamp(roundDiv(low.getEpochMicros(), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0), 0); high = new LongTimestamp(roundDiv(high.getEpochMicros(), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0), 0); - return new LongTimestampRange(low, high, factor); + return new LongTimestampRange(low, high, factor, step); } factor = (int) POWERS_OF_TEN[12 - precision]; int lowPicosOfMicro = roundDiv(low.getPicosOfMicro(), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0); @@ -653,13 +837,26 @@ static LongTimestampRange of(Range range, int precision) high = new LongTimestamp( high.getEpochMicros() + (highPicosOfMicro > factor ? 1 : 0), highPicosOfMicro % factor); - return new LongTimestampRange(low, high, factor); + return new LongTimestampRange(low, high, factor, step); + } + + LongTimestamp at(long index, long stepFactor) + { + // TODO support nanosecond increments + // TODO handle exclusive high + long epochMicros = low.getEpochMicros() + roundDiv(index * step, stepFactor); + return new LongTimestamp(step > 0 ? Math.min(epochMicros, high.getEpochMicros()) : Math.max(epochMicros, high.getEpochMicros()), 0); } } - private record ShortTimestampWithTimeZoneRange(long low, long high, long factor, TimeZoneKey defaultTZ) + private record ShortTimestampWithTimeZoneRange(long low, long high, long factor, TimeZoneKey defaultTZ, long step) { static ShortTimestampWithTimeZoneRange of(Range range, int precision) + { + return of(range, precision, 1); + } + + static ShortTimestampWithTimeZoneRange of(Range range, int precision, long step) { TimeZoneKey defaultTZ = range.getLowValue() .map(v -> unpackZoneKey((long) v)) @@ -669,13 +866,25 @@ static ShortTimestampWithTimeZoneRange of(Range range, int precision) long factor = POWERS_OF_TEN[3 - precision]; long low = roundDiv(unpackMillisUtc((long) range.getLowValue().orElse(Long.MIN_VALUE)), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0); long high = roundDiv(unpackMillisUtc((long) range.getHighValue().orElse(Long.MAX_VALUE)), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0); - return new ShortTimestampWithTimeZoneRange(low, high, factor, defaultTZ); + return new ShortTimestampWithTimeZoneRange(low, high, factor, defaultTZ, step); + } + + long at(long index, long stepFactor) + { + // TODO support nanosecond increments + long millis = low + roundDiv(index * step, factor * stepFactor); + return packDateTimeWithZone((step > 0 ? Math.min(millis, high - 1) : Math.max(millis, high - 1)) * factor, defaultTZ); } } - private record LongTimestampWithTimeZoneRange(LongTimestampWithTimeZone low, LongTimestampWithTimeZone high, int factor, short defaultTZ) + private record LongTimestampWithTimeZoneRange(LongTimestampWithTimeZone low, LongTimestampWithTimeZone high, int factor, short defaultTZ, long step) { static LongTimestampWithTimeZoneRange of(Range range, int precision) + { + return of(range, precision, 1); + } + + static LongTimestampWithTimeZoneRange of(Range range, int precision, long step) { short defaultTZ = range.getLowValue() .map(v -> ((LongTimestampWithTimeZone) v).getTimeZoneKey()) @@ -698,13 +907,26 @@ static LongTimestampWithTimeZoneRange of(Range range, int precision) high.getEpochMillis() + (highPicosOfMilli > factor ? 1 : 0), highPicosOfMilli % factor, high.getTimeZoneKey()); - return new LongTimestampWithTimeZoneRange(low, high, factor, defaultTZ); + return new LongTimestampWithTimeZoneRange(low, high, factor, defaultTZ, step); + } + + LongTimestampWithTimeZone at(long index, long stepFactor) + { + // TODO support nanosecond increments + // TODO handle exclusive high + long millis = low.getEpochMillis() + roundDiv(index * step, stepFactor); + return fromEpochMillisAndFraction(step > 0 ? Math.min(millis, high.getEpochMillis()) : Math.max(millis, high.getEpochMillis()), 0, defaultTZ); } } - private record ShortTimeWithTimeZoneRange(long low, long high, long factor, int offsetMinutes) + private record ShortTimeWithTimeZoneRange(long low, long high, long factor, int offsetMinutes, long step) { static ShortTimeWithTimeZoneRange of(Range range, int precision) + { + return of(range, precision, 1); + } + + static ShortTimeWithTimeZoneRange of(Range range, int precision, long step) { int offsetMinutes = range.getLowValue() .map(v -> unpackOffsetMinutes((long) v)) @@ -714,13 +936,24 @@ static ShortTimeWithTimeZoneRange of(Range range, int precision) long factor = POWERS_OF_TEN[9 - precision]; long low = roundDiv(range.getLowValue().map(v -> unpackTimeNanos((long) v)).orElse(0L), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0); long high = roundDiv(range.getHighValue().map(v -> unpackTimeNanos((long) v)).orElse(NANOSECONDS_PER_DAY), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0); - return new ShortTimeWithTimeZoneRange(low, high, factor, offsetMinutes); + return new ShortTimeWithTimeZoneRange(low, high, factor, offsetMinutes, step); + } + + long at(long index) + { + long nanos = low + roundDiv(index * step, factor); + return packTimeWithTimeZone((step > 0 ? Math.min(nanos, high - 1) : Math.max(nanos, high - 1)) * factor, offsetMinutes); } } - private record LongTimeWithTimeZoneRange(long low, long high, int factor, int offsetMinutes) + private record LongTimeWithTimeZoneRange(long low, long high, int factor, int offsetMinutes, long step) { static LongTimeWithTimeZoneRange of(Range range, int precision) + { + return of(range, precision, 1); + } + + static LongTimeWithTimeZoneRange of(Range range, int precision, long step) { int offsetMinutes = range.getLowValue() .map(v -> ((LongTimeWithTimeZone) v).getOffsetMinutes()) @@ -735,7 +968,13 @@ static LongTimeWithTimeZoneRange of(Range range, int precision) int factor = (int) POWERS_OF_TEN[12 - precision]; long longLow = roundDiv(low.getPicoseconds(), factor) + (!range.isLowUnbounded() && !range.isLowInclusive() ? 1 : 0); long longHigh = roundDiv(high.getPicoseconds(), factor) + (!range.isHighUnbounded() && range.isHighInclusive() ? 1 : 0); - return new LongTimeWithTimeZoneRange(longLow, longHigh, factor, offsetMinutes); + return new LongTimeWithTimeZoneRange(longLow, longHigh, factor, offsetMinutes, step); + } + + LongTimeWithTimeZone at(long index) + { + long picoseconds = low + roundDiv(index * step, factor); + return new LongTimeWithTimeZone((step > 0 ? Math.min(picoseconds, high - 1) : Math.max(picoseconds, high - 1)) * factor, offsetMinutes); } } @@ -816,6 +1055,12 @@ private Generator generateUUID(Range range) }; } + @FunctionalInterface + private interface SequenceWriter + { + void accept(BlockBuilder blockBuilder, long rowId); + } + @FunctionalInterface private interface ObjectWriter { diff --git a/plugin/trino-faker/src/test/java/io/trino/plugin/faker/TestFakerQueries.java b/plugin/trino-faker/src/test/java/io/trino/plugin/faker/TestFakerQueries.java index 503cfbed8319..4861a9ece0c8 100644 --- a/plugin/trino-faker/src/test/java/io/trino/plugin/faker/TestFakerQueries.java +++ b/plugin/trino-faker/src/test/java/io/trino/plugin/faker/TestFakerQueries.java @@ -385,6 +385,51 @@ void testSelectValuesProperty() } } + @Test + void testSelectStepProperties() + { + // small step in small ranges that produce only 10 unique values for 1000 rows + List testCases = ImmutableList.builder() + .add(new TestDataType("rnd_bigint", "bigint", Map.of("min", "0", "max", "9", "step", "1"), "count(distinct rnd_bigint)", "10")) + .add(new TestDataType("rnd_integer", "integer", Map.of("min", "0", "max", "9", "step", "1"), "count(distinct rnd_integer)", "10")) + .add(new TestDataType("rnd_smallint", "smallint", Map.of("min", "0", "max", "9", "step", "1"), "count(distinct rnd_smallint)", "10")) + .add(new TestDataType("rnd_tinyint", "tinyint", Map.of("min", "0", "max", "9", "step", "1"), "count(distinct rnd_tinyint)", "10")) + .add(new TestDataType("rnd_date", "date", Map.of("min", "2022-03-01", "max", "2022-03-10", "step", "1d"), "count(distinct rnd_date)", "10")) + .add(new TestDataType("rnd_decimal1", "decimal", Map.of("min", "0", "max", "9", "step", "1"), "count(distinct rnd_decimal1)", "10")) + .add(new TestDataType("rnd_decimal2", "decimal(18,5)", Map.of("min", "0.00000", "max", "0.00009", "step", "0.00001"), "count(distinct rnd_decimal2)", "10")) + .add(new TestDataType("rnd_decimal3", "decimal(38,0)", Map.of("min", "0", "max", "9", "step", "1"), "count(distinct rnd_decimal3)", "10")) + .add(new TestDataType("rnd_decimal4", "decimal(38,38)", Map.of("min", "0.00000000000000000000000000000000000000", "max", "0.00000000000000000000000000000000000009", "step", "0.00000000000000000000000000000000000001"), "count(distinct rnd_decimal4)", "10")) + .add(new TestDataType("rnd_decimal5", "decimal(5,2)", Map.of("min", "0.00", "max", "1.09", "step", "0.01"), "count(distinct rnd_decimal5)", "110")) + .add(new TestDataType("rnd_real", "real", Map.of("min", "0.0", "max", "1.3E-44", "step", "1.4E-45"), "count(distinct rnd_real)", "10")) + .add(new TestDataType("rnd_double", "double", Map.of("min", "0.0", "max", "4.4E-323", "step", "4.9E-324"), "count(distinct rnd_double)", "10")) + .add(new TestDataType("rnd_interval1", "interval day to second", Map.of("min", "0.000", "max", "0.009", "step", "0.001"), "count(distinct rnd_interval1)", "10")) + .add(new TestDataType("rnd_interval2", "interval year to month", Map.of("min", "0", "max", "9", "step", "1"), "count(distinct rnd_interval2)", "10")) + .add(new TestDataType("rnd_timestamp", "timestamp", Map.of("min", "2022-03-21 00:00:00.000", "max", "2022-03-21 00:00:00.009", "step", "1ms"), "count(distinct rnd_timestamp)", "10")) + .add(new TestDataType("rnd_timestamp0", "timestamp(0)", Map.of("min", "2022-03-21 00:00:00", "max", "2022-03-21 00:00:09", "step", "1s"), "count(distinct rnd_timestamp0)", "10")) + .add(new TestDataType("rnd_timestamp6", "timestamp(6)", Map.of("min", "2022-03-21 00:00:00.000000", "max", "2022-03-21 00:00:00.000009", "step", "1us"), "count(distinct rnd_timestamp6)", "10")) + .add(new TestDataType("rnd_timestamp9", "timestamp(9)", Map.of("min", "2022-03-21 00:00:00.000000000", "max", "2022-03-21 00:00:00.000009000", "step", "1us"), "count(distinct rnd_timestamp9)", "10")) + .add(new TestDataType("rnd_timestamptz", "timestamp with time zone", Map.of("min", "2022-03-21 00:00:00.000 +01:00", "max", "2022-03-21 00:00:00.009 +01:00", "step", "1ms"), "count(distinct rnd_timestamptz)", "10")) + .add(new TestDataType("rnd_timestamptz0", "timestamp(0) with time zone", Map.of("min", "2022-03-21 00:00:00 +01:00", "max", "2022-03-21 00:00:09 +01:00", "step", "1s"), "count(distinct rnd_timestamptz0)", "10")) + .add(new TestDataType("rnd_timestamptz6", "timestamp(6) with time zone", Map.of("min", "2022-03-21 00:00:00.000000 +01:00", "max", "2022-03-21 00:00:00.009000 +01:00", "step", "1ms"), "count(distinct rnd_timestamptz6)", "10")) + .add(new TestDataType("rnd_timestamptz9", "timestamp(9) with time zone", Map.of("min", "2022-03-21 00:00:00.000000000 +01:00", "max", "2022-03-21 00:00:00.009000000 +01:00", "step", "1ms"), "count(distinct rnd_timestamptz9)", "10")) + .add(new TestDataType("rnd_time", "time", Map.of("min", "01:02:03.456", "max", "01:02:03.465", "step", "1ms"), "count(distinct rnd_time)", "10")) + .add(new TestDataType("rnd_time0", "time(0)", Map.of("min", "01:02:03", "max", "01:02:12", "step", "1s"), "count(distinct rnd_time0)", "10")) + .add(new TestDataType("rnd_time6", "time(6)", Map.of("min", "01:02:03.000456", "max", "01:02:03.000465", "step", "1us"), "count(distinct rnd_time6)", "10")) + .add(new TestDataType("rnd_time9", "time(9)", Map.of("min", "01:02:03.000000456", "max", "01:02:03.000000465", "step", "1ns"), "count(distinct rnd_time9)", "10")) + .add(new TestDataType("rnd_timetz", "time with time zone", Map.of("min", "01:02:03.456 +01:00", "max", "01:02:03.465 +01:00", "step", "1ms"), "count(distinct rnd_timetz)", "10")) + .add(new TestDataType("rnd_timetz0", "time(0) with time zone", Map.of("min", "01:02:03 +01:00", "max", "01:02:12 +01:00", "step", "1s"), "count(distinct rnd_timetz0)", "10")) + .add(new TestDataType("rnd_timetz6", "time(6) with time zone", Map.of("min", "01:02:03.000456 +01:00", "max", "01:02:03.000465 +01:00", "step", "1us"), "count(distinct rnd_timetz6)", "10")) + .add(new TestDataType("rnd_timetz9", "time(9) with time zone", Map.of("min", "01:02:03.000000456 +01:00", "max", "01:02:03.000000465 +01:00", "step", "1ns"), "count(distinct rnd_timetz9)", "10")) + .add(new TestDataType("rnd_timetz12", "time(12) with time zone", Map.of("min", "01:02:03.000000000456 +01:00", "max", "01:02:03.000000009456 +01:00", "step", "1ns"), "count(distinct rnd_timetz12)", "10")) + .build(); + + for (TestDataType testCase : testCases) { + try (TestTable table = new TestTable(getQueryRunner()::execute, "step_small_" + testCase.name(), "(%s)".formatted(testCase.columnSchema()))) { + assertQuery("SELECT %s FROM %s".formatted(testCase.queryExpression(), table.getName()), "VALUES (%s)".formatted(testCase.expectedValue())); + } + } + } + private record TestDataType(String name, String type, Map properties, String queryExpression, String expectedValue) { public TestDataType(String name, String type, String queryExpression, String expectedValue)