diff --git a/docs/src/main/sphinx/connector/delta-lake.rst b/docs/src/main/sphinx/connector/delta-lake.rst index 7acfb3de8a16..bfd9e92c35fe 100644 --- a/docs/src/main/sphinx/connector/delta-lake.rst +++ b/docs/src/main/sphinx/connector/delta-lake.rst @@ -195,10 +195,6 @@ The following table describes :ref:`catalog session properties * - Property name - Description - Default - * - ``parquet_optimized_reader_enabled`` - - Specifies whether batched column readers are used when reading Parquet - files for improved performance. - - ``true`` * - ``parquet_max_read_block_size`` - The maximum block size used when reading Parquet files. - ``16MB`` @@ -1138,19 +1134,6 @@ connector. - Sets the maximum number of rows read in a batch. The equivalent catalog session property is ``parquet_max_read_block_row_count``. - ``8192`` - * - ``parquet.optimized-reader.enabled`` - - Specifies whether batched column readers are used when reading Parquet - files for improved performance. Set this property to ``false`` to - disable the optimized parquet reader by default. The equivalent catalog - session property is ``parquet_optimized_reader_enabled``. - - ``true`` - * - ``parquet.optimized-nested-reader.enabled`` - - Specifies whether batched column readers are used when reading ARRAY, - MAP, and ROW types from Parquet files for improved performance. Set this - property to ``false`` to disable the optimized parquet reader by default - for structural data types. The equivalent catalog session property is - ``parquet_optimized_nested_reader_enabled``. - - ``true`` * - ``parquet.use-column-index`` - Skip reading Parquet pages by using Parquet column indices. The equivalent catalog session property is ``parquet_use_column_index``. diff --git a/docs/src/main/sphinx/connector/hive.rst b/docs/src/main/sphinx/connector/hive.rst index 9bf053e6c333..49202742f114 100644 --- a/docs/src/main/sphinx/connector/hive.rst +++ b/docs/src/main/sphinx/connector/hive.rst @@ -1660,12 +1660,6 @@ with Parquet files performed by the Hive connector. definition. The equivalent catalog session property is ``parquet_use_column_names``. - ``true`` - * - ``parquet.optimized-reader.enabled`` - - Whether batched column readers are used when reading Parquet files - for improved performance. Set this property to ``false`` to disable the - optimized parquet reader by default. The equivalent catalog session - property is ``parquet_optimized_reader_enabled``. - - ``true`` * - ``parquet.writer.validation-percentage`` - Percentage of Parquet files to validate after write by re-reading the whole file. The equivalent catalog session property is ``parquet_optimized_writer_validation_percentage``. @@ -1689,13 +1683,6 @@ with Parquet files performed by the Hive connector. * - ``parquet.max-read-block-row-count`` - Sets the maximum number of rows read in a batch. - ``8192`` - * - ``parquet.optimized-nested-reader.enabled`` - - Whether batched column readers should be used when reading ARRAY, MAP - and ROW types from Parquet files for improved performance. Set this - property to ``false`` to disable the optimized parquet reader by default - for structural data types. The equivalent catalog session property is - ``parquet_optimized_nested_reader_enabled``. - - ``true`` Hive 3-related limitations -------------------------- diff --git a/docs/src/main/sphinx/connector/hudi.rst b/docs/src/main/sphinx/connector/hudi.rst index fb49b3b63227..d5561d9a7bea 100644 --- a/docs/src/main/sphinx/connector/hudi.rst +++ b/docs/src/main/sphinx/connector/hudi.rst @@ -57,19 +57,6 @@ Additionally, following configuration properties can be set depending on the use - Access Parquet columns using names from the file. If disabled, then columns are accessed using the index. Only applicable to Parquet file format. - ``true`` - * - ``parquet.optimized-reader.enabled`` - - Whether batched column readers must be used when reading Parquet files - for improved performance. Set this property to ``false`` to disable the - optimized parquet reader by default. The equivalent catalog session - property is ``parquet_optimized_reader_enabled``. - - ``true`` - * - ``parquet.optimized-nested-reader.enabled`` - - Whether batched column readers must be used when reading ARRAY, MAP - and ROW types from Parquet files for improved performance. Set this - property to ``false`` to disable the optimized parquet reader by default - for structural data types. The equivalent catalog session property is - ``parquet_optimized_nested_reader_enabled``. - - ``true`` * - ``hudi.split-generator-parallelism`` - Number of threads to generate splits from partitions. - ``4`` diff --git a/docs/src/main/sphinx/connector/iceberg.rst b/docs/src/main/sphinx/connector/iceberg.rst index fe72ce9e9e2d..48d1ba3ef1d2 100644 --- a/docs/src/main/sphinx/connector/iceberg.rst +++ b/docs/src/main/sphinx/connector/iceberg.rst @@ -1595,19 +1595,6 @@ with Parquet files performed by the Iceberg connector. * - ``parquet.max-read-block-row-count`` - Sets the maximum number of rows read in a batch. - ``8192`` - * - ``parquet.optimized-reader.enabled`` - - Whether batched column readers are used when reading Parquet files for - improved performance. Set this property to ``false`` to disable the - optimized parquet reader by default. The equivalent catalog session - property is ``parquet_optimized_reader_enabled``. - - ``true`` - * - ``parquet.optimized-nested-reader.enabled`` - - Whether batched column readers are used when reading ARRAY, MAP, and ROW - types from Parquet files for improved performance. Set this property to - ``false`` to disable the optimized parquet reader by default for - structural data types. The equivalent catalog session property is - ``parquet_optimized_nested_reader_enabled``. - - ``true`` * - ``parquet.use-bloom-filter`` - Whether bloom filters are used for predicate pushdown when reading Parquet files. Set this property to ``false`` to disable the usage of diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/BinaryColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/BinaryColumnReader.java deleted file mode 100644 index 89f7ce729dec..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/BinaryColumnReader.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.airlift.slice.Slice; -import io.trino.parquet.PrimitiveField; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.type.CharType; -import io.trino.spi.type.Type; -import io.trino.spi.type.VarcharType; -import org.apache.parquet.io.api.Binary; - -import static io.airlift.slice.Slices.EMPTY_SLICE; -import static io.airlift.slice.Slices.wrappedBuffer; -import static io.trino.spi.type.Chars.truncateToLengthAndTrimSpaces; -import static io.trino.spi.type.Varchars.truncateToLength; - -public class BinaryColumnReader - extends PrimitiveColumnReader -{ - public BinaryColumnReader(PrimitiveField field) - { - super(field); - } - - @Override - protected void readValue(BlockBuilder blockBuilder, Type type) - { - Binary binary = valuesReader.readBytes(); - Slice value; - if (binary.length() == 0) { - value = EMPTY_SLICE; - } - else { - value = wrappedBuffer(binary.getBytes()); - } - if (type instanceof VarcharType) { - value = truncateToLength(value, type); - } - if (type instanceof CharType) { - value = truncateToLengthAndTrimSpaces(value, type); - } - type.writeSlice(blockBuilder, value); - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/BooleanColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/BooleanColumnReader.java deleted file mode 100644 index bf215cfc4715..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/BooleanColumnReader.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.trino.parquet.PrimitiveField; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.type.Type; - -public class BooleanColumnReader - extends PrimitiveColumnReader -{ - public BooleanColumnReader(PrimitiveField primitiveField) - { - super(primitiveField); - } - - @Override - protected void readValue(BlockBuilder blockBuilder, Type type) - { - type.writeBoolean(blockBuilder, valuesReader.readBoolean()); - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ColumnReaderFactory.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ColumnReaderFactory.java index 2a457f97d302..7997149bc0e0 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ColumnReaderFactory.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ColumnReaderFactory.java @@ -15,7 +15,6 @@ import io.trino.memory.context.AggregatedMemoryContext; import io.trino.memory.context.LocalMemoryContext; -import io.trino.parquet.ParquetReaderOptions; import io.trino.parquet.PrimitiveField; import io.trino.parquet.reader.decoders.ValueDecoder; import io.trino.parquet.reader.decoders.ValueDecoders; @@ -48,7 +47,6 @@ import java.util.Optional; import static io.trino.parquet.ParquetEncoding.PLAIN; -import static io.trino.parquet.ParquetTypeUtils.createDecimalType; import static io.trino.parquet.reader.decoders.ValueDecoder.ValueDecodersProvider; import static io.trino.parquet.reader.flat.BinaryColumnAdapter.BINARY_ADAPTER; import static io.trino.parquet.reader.flat.ByteColumnAdapter.BYTE_ADAPTER; @@ -76,7 +74,6 @@ import static java.util.Objects.requireNonNull; import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS; import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS; -import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT; @@ -87,14 +84,10 @@ public final class ColumnReaderFactory { private final DateTimeZone timeZone; - private final boolean useBatchColumnReaders; - private final boolean useBatchNestedColumnReaders; - public ColumnReaderFactory(DateTimeZone timeZone, ParquetReaderOptions readerOptions) + public ColumnReaderFactory(DateTimeZone timeZone) { this.timeZone = requireNonNull(timeZone, "dateTimeZone is null"); - this.useBatchColumnReaders = readerOptions.useBatchColumnReaders(); - this.useBatchNestedColumnReaders = readerOptions.useBatchNestedColumnReaders(); } public ColumnReader create(PrimitiveField field, AggregatedMemoryContext aggregatedMemoryContext) @@ -103,241 +96,180 @@ public ColumnReader create(PrimitiveField field, AggregatedMemoryContext aggrega PrimitiveTypeName primitiveType = field.getDescriptor().getPrimitiveType().getPrimitiveTypeName(); LogicalTypeAnnotation annotation = field.getDescriptor().getPrimitiveType().getLogicalTypeAnnotation(); LocalMemoryContext memoryContext = aggregatedMemoryContext.newLocalMemoryContext(ColumnReader.class.getSimpleName()); - if (useBatchedColumnReaders(field)) { - ValueDecoders valueDecoders = new ValueDecoders(field); - if (BOOLEAN.equals(type) && primitiveType == PrimitiveTypeName.BOOLEAN) { - return createColumnReader(field, valueDecoders::getBooleanDecoder, BYTE_ADAPTER, memoryContext); + ValueDecoders valueDecoders = new ValueDecoders(field); + if (BOOLEAN.equals(type) && primitiveType == PrimitiveTypeName.BOOLEAN) { + return createColumnReader(field, valueDecoders::getBooleanDecoder, BYTE_ADAPTER, memoryContext); + } + if (TINYINT.equals(type) && isIntegerOrDecimalPrimitive(primitiveType)) { + if (isZeroScaleShortDecimalAnnotation(annotation)) { + return createColumnReader(field, valueDecoders::getShortDecimalToByteDecoder, BYTE_ADAPTER, memoryContext); } - if (TINYINT.equals(type) && isIntegerOrDecimalPrimitive(primitiveType)) { - if (isZeroScaleShortDecimalAnnotation(annotation)) { - return createColumnReader(field, valueDecoders::getShortDecimalToByteDecoder, BYTE_ADAPTER, memoryContext); - } - if (!isIntegerAnnotationAndPrimitive(annotation, primitiveType)) { - throw unsupportedException(type, field); - } - return createColumnReader(field, valueDecoders::getByteDecoder, BYTE_ADAPTER, memoryContext); + if (!isIntegerAnnotationAndPrimitive(annotation, primitiveType)) { + throw unsupportedException(type, field); } - if (SMALLINT.equals(type) && isIntegerOrDecimalPrimitive(primitiveType)) { - if (isZeroScaleShortDecimalAnnotation(annotation)) { - return createColumnReader(field, valueDecoders::getShortDecimalToShortDecoder, SHORT_ADAPTER, memoryContext); - } - if (!isIntegerAnnotationAndPrimitive(annotation, primitiveType)) { - throw unsupportedException(type, field); - } - return createColumnReader(field, valueDecoders::getShortDecoder, SHORT_ADAPTER, memoryContext); + return createColumnReader(field, valueDecoders::getByteDecoder, BYTE_ADAPTER, memoryContext); + } + if (SMALLINT.equals(type) && isIntegerOrDecimalPrimitive(primitiveType)) { + if (isZeroScaleShortDecimalAnnotation(annotation)) { + return createColumnReader(field, valueDecoders::getShortDecimalToShortDecoder, SHORT_ADAPTER, memoryContext); } - if (DATE.equals(type) && primitiveType == INT32) { - if (annotation == null || annotation instanceof DateLogicalTypeAnnotation) { - return createColumnReader(field, valueDecoders::getIntDecoder, INT_ADAPTER, memoryContext); - } + if (!isIntegerAnnotationAndPrimitive(annotation, primitiveType)) { throw unsupportedException(type, field); } - if (type instanceof AbstractIntType && isIntegerOrDecimalPrimitive(primitiveType)) { - if (isZeroScaleShortDecimalAnnotation(annotation)) { - return createColumnReader(field, valueDecoders::getShortDecimalToIntDecoder, INT_ADAPTER, memoryContext); - } - if (!isIntegerAnnotationAndPrimitive(annotation, primitiveType)) { - throw unsupportedException(type, field); - } + return createColumnReader(field, valueDecoders::getShortDecoder, SHORT_ADAPTER, memoryContext); + } + if (DATE.equals(type) && primitiveType == INT32) { + if (annotation == null || annotation instanceof DateLogicalTypeAnnotation) { return createColumnReader(field, valueDecoders::getIntDecoder, INT_ADAPTER, memoryContext); } - if (type instanceof TimeType) { - if (!(annotation instanceof TimeLogicalTypeAnnotation timeAnnotation)) { - throw unsupportedException(type, field); - } - if (primitiveType == INT64 && timeAnnotation.getUnit() == MICROS) { - return createColumnReader(field, valueDecoders::getTimeMicrosDecoder, LONG_ADAPTER, memoryContext); - } - if (primitiveType == INT32 && timeAnnotation.getUnit() == MILLIS) { - return createColumnReader(field, valueDecoders::getTimeMillisDecoder, LONG_ADAPTER, memoryContext); - } + throw unsupportedException(type, field); + } + if (type instanceof AbstractIntType && isIntegerOrDecimalPrimitive(primitiveType)) { + if (isZeroScaleShortDecimalAnnotation(annotation)) { + return createColumnReader(field, valueDecoders::getShortDecimalToIntDecoder, INT_ADAPTER, memoryContext); + } + if (!isIntegerAnnotationAndPrimitive(annotation, primitiveType)) { throw unsupportedException(type, field); } - if (BIGINT.equals(type) && primitiveType == INT64 - && (annotation instanceof TimestampLogicalTypeAnnotation || annotation instanceof TimeLogicalTypeAnnotation)) { - return createColumnReader(field, valueDecoders::getLongDecoder, LONG_ADAPTER, memoryContext); + return createColumnReader(field, valueDecoders::getIntDecoder, INT_ADAPTER, memoryContext); + } + if (type instanceof TimeType) { + if (!(annotation instanceof TimeLogicalTypeAnnotation timeAnnotation)) { + throw unsupportedException(type, field); + } + if (primitiveType == INT64 && timeAnnotation.getUnit() == MICROS) { + return createColumnReader(field, valueDecoders::getTimeMicrosDecoder, LONG_ADAPTER, memoryContext); + } + if (primitiveType == INT32 && timeAnnotation.getUnit() == MILLIS) { + return createColumnReader(field, valueDecoders::getTimeMillisDecoder, LONG_ADAPTER, memoryContext); + } + throw unsupportedException(type, field); + } + if (BIGINT.equals(type) && primitiveType == INT64 + && (annotation instanceof TimestampLogicalTypeAnnotation || annotation instanceof TimeLogicalTypeAnnotation)) { + return createColumnReader(field, valueDecoders::getLongDecoder, LONG_ADAPTER, memoryContext); + } + if (type instanceof AbstractLongType && isIntegerOrDecimalPrimitive(primitiveType)) { + if (isZeroScaleShortDecimalAnnotation(annotation)) { + return createColumnReader(field, valueDecoders::getShortDecimalDecoder, LONG_ADAPTER, memoryContext); } - if (type instanceof AbstractLongType && isIntegerOrDecimalPrimitive(primitiveType)) { - if (isZeroScaleShortDecimalAnnotation(annotation)) { - return createColumnReader(field, valueDecoders::getShortDecimalDecoder, LONG_ADAPTER, memoryContext); - } - if (!isIntegerAnnotationAndPrimitive(annotation, primitiveType)) { - throw unsupportedException(type, field); - } - if (primitiveType == INT32) { - return createColumnReader(field, valueDecoders::getInt32ToLongDecoder, LONG_ADAPTER, memoryContext); - } - if (primitiveType == INT64) { - return createColumnReader(field, valueDecoders::getLongDecoder, LONG_ADAPTER, memoryContext); - } + if (!isIntegerAnnotationAndPrimitive(annotation, primitiveType)) { + throw unsupportedException(type, field); + } + if (primitiveType == INT32) { + return createColumnReader(field, valueDecoders::getInt32ToLongDecoder, LONG_ADAPTER, memoryContext); + } + if (primitiveType == INT64) { + return createColumnReader(field, valueDecoders::getLongDecoder, LONG_ADAPTER, memoryContext); } - if (REAL.equals(type) && primitiveType == FLOAT) { - return createColumnReader(field, valueDecoders::getRealDecoder, INT_ADAPTER, memoryContext); + } + if (REAL.equals(type) && primitiveType == FLOAT) { + return createColumnReader(field, valueDecoders::getRealDecoder, INT_ADAPTER, memoryContext); + } + if (DOUBLE.equals(type)) { + if (primitiveType == PrimitiveTypeName.DOUBLE) { + return createColumnReader(field, valueDecoders::getDoubleDecoder, LONG_ADAPTER, memoryContext); } - if (DOUBLE.equals(type)) { - if (primitiveType == PrimitiveTypeName.DOUBLE) { - return createColumnReader(field, valueDecoders::getDoubleDecoder, LONG_ADAPTER, memoryContext); - } - if (primitiveType == FLOAT) { - return createColumnReader(field, valueDecoders::getFloatToDoubleDecoder, LONG_ADAPTER, memoryContext); - } + if (primitiveType == FLOAT) { + return createColumnReader(field, valueDecoders::getFloatToDoubleDecoder, LONG_ADAPTER, memoryContext); } - if (type instanceof TimestampType timestampType && primitiveType == INT96) { - if (timestampType.isShort()) { - return createColumnReader( - field, - (encoding) -> valueDecoders.getInt96ToShortTimestampDecoder(encoding, timeZone), - LONG_ADAPTER, - memoryContext); - } + } + if (type instanceof TimestampType timestampType && primitiveType == INT96) { + if (timestampType.isShort()) { return createColumnReader( field, - (encoding) -> valueDecoders.getInt96ToLongTimestampDecoder(encoding, timeZone), - FIXED12_ADAPTER, + (encoding) -> valueDecoders.getInt96ToShortTimestampDecoder(encoding, timeZone), + LONG_ADAPTER, memoryContext); } - if (type instanceof TimestampWithTimeZoneType timestampWithTimeZoneType && primitiveType == INT96) { - if (timestampWithTimeZoneType.isShort()) { - return createColumnReader(field, valueDecoders::getInt96ToShortTimestampWithTimeZoneDecoder, LONG_ADAPTER, memoryContext); - } + return createColumnReader( + field, + (encoding) -> valueDecoders.getInt96ToLongTimestampDecoder(encoding, timeZone), + FIXED12_ADAPTER, + memoryContext); + } + if (type instanceof TimestampWithTimeZoneType timestampWithTimeZoneType && primitiveType == INT96) { + if (timestampWithTimeZoneType.isShort()) { + return createColumnReader(field, valueDecoders::getInt96ToShortTimestampWithTimeZoneDecoder, LONG_ADAPTER, memoryContext); + } + throw unsupportedException(type, field); + } + if (type instanceof TimestampType timestampType && primitiveType == INT64) { + if (!(annotation instanceof TimestampLogicalTypeAnnotation timestampAnnotation)) { throw unsupportedException(type, field); } - if (type instanceof TimestampType timestampType && primitiveType == INT64) { - if (!(annotation instanceof TimestampLogicalTypeAnnotation timestampAnnotation)) { - throw unsupportedException(type, field); - } - if (timestampType.isShort()) { - return switch (timestampAnnotation.getUnit()) { - case MILLIS -> createColumnReader(field, valueDecoders::getInt64TimestampMillsToShortTimestampDecoder, LONG_ADAPTER, memoryContext); - case MICROS -> createColumnReader(field, valueDecoders::getInt64TimestampMicrosToShortTimestampDecoder, LONG_ADAPTER, memoryContext); - case NANOS -> createColumnReader(field, valueDecoders::getInt64TimestampNanosToShortTimestampDecoder, LONG_ADAPTER, memoryContext); - }; - } + if (timestampType.isShort()) { return switch (timestampAnnotation.getUnit()) { - case MILLIS -> createColumnReader(field, valueDecoders::getInt64TimestampMillisToLongTimestampDecoder, FIXED12_ADAPTER, memoryContext); - case MICROS -> createColumnReader(field, valueDecoders::getInt64TimestampMicrosToLongTimestampDecoder, FIXED12_ADAPTER, memoryContext); - case NANOS -> createColumnReader(field, valueDecoders::getInt64TimestampNanosToLongTimestampDecoder, FIXED12_ADAPTER, memoryContext); + case MILLIS -> createColumnReader(field, valueDecoders::getInt64TimestampMillsToShortTimestampDecoder, LONG_ADAPTER, memoryContext); + case MICROS -> createColumnReader(field, valueDecoders::getInt64TimestampMicrosToShortTimestampDecoder, LONG_ADAPTER, memoryContext); + case NANOS -> createColumnReader(field, valueDecoders::getInt64TimestampNanosToShortTimestampDecoder, LONG_ADAPTER, memoryContext); }; } - if (type instanceof TimestampWithTimeZoneType timestampWithTimeZoneType && primitiveType == INT64) { - if (!(annotation instanceof TimestampLogicalTypeAnnotation timestampAnnotation)) { - throw unsupportedException(type, field); - } - if (timestampWithTimeZoneType.isShort()) { - return switch (timestampAnnotation.getUnit()) { - case MILLIS -> createColumnReader(field, valueDecoders::getInt64TimestampMillsToShortTimestampWithTimeZoneDecoder, LONG_ADAPTER, memoryContext); - case MICROS -> createColumnReader(field, valueDecoders::getInt64TimestampMicrosToShortTimestampWithTimeZoneDecoder, LONG_ADAPTER, memoryContext); - case NANOS -> throw unsupportedException(type, field); - }; - } + return switch (timestampAnnotation.getUnit()) { + case MILLIS -> createColumnReader(field, valueDecoders::getInt64TimestampMillisToLongTimestampDecoder, FIXED12_ADAPTER, memoryContext); + case MICROS -> createColumnReader(field, valueDecoders::getInt64TimestampMicrosToLongTimestampDecoder, FIXED12_ADAPTER, memoryContext); + case NANOS -> createColumnReader(field, valueDecoders::getInt64TimestampNanosToLongTimestampDecoder, FIXED12_ADAPTER, memoryContext); + }; + } + if (type instanceof TimestampWithTimeZoneType timestampWithTimeZoneType && primitiveType == INT64) { + if (!(annotation instanceof TimestampLogicalTypeAnnotation timestampAnnotation)) { + throw unsupportedException(type, field); + } + if (timestampWithTimeZoneType.isShort()) { return switch (timestampAnnotation.getUnit()) { - case MILLIS, NANOS -> throw unsupportedException(type, field); - case MICROS -> createColumnReader(field, valueDecoders::getInt64TimestampMicrosToLongTimestampWithTimeZoneDecoder, FIXED12_ADAPTER, memoryContext); + case MILLIS -> createColumnReader(field, valueDecoders::getInt64TimestampMillsToShortTimestampWithTimeZoneDecoder, LONG_ADAPTER, memoryContext); + case MICROS -> createColumnReader(field, valueDecoders::getInt64TimestampMicrosToShortTimestampWithTimeZoneDecoder, LONG_ADAPTER, memoryContext); + case NANOS -> throw unsupportedException(type, field); }; } - if (type instanceof DecimalType decimalType && decimalType.isShort() - && isIntegerOrDecimalPrimitive(primitiveType)) { - if (primitiveType == INT32 && isIntegerAnnotation(annotation)) { - return createColumnReader(field, valueDecoders::getInt32ToShortDecimalDecoder, LONG_ADAPTER, memoryContext); - } - if (!(annotation instanceof DecimalLogicalTypeAnnotation decimalAnnotation)) { - throw unsupportedException(type, field); - } - if (isDecimalRescaled(decimalAnnotation, decimalType)) { - return createColumnReader(field, valueDecoders::getRescaledShortDecimalDecoder, LONG_ADAPTER, memoryContext); - } - return createColumnReader(field, valueDecoders::getShortDecimalDecoder, LONG_ADAPTER, memoryContext); - } - if (type instanceof DecimalType decimalType && !decimalType.isShort() - && isIntegerOrDecimalPrimitive(primitiveType)) { - if (!(annotation instanceof DecimalLogicalTypeAnnotation decimalAnnotation)) { - throw unsupportedException(type, field); - } - if (isDecimalRescaled(decimalAnnotation, decimalType)) { - return createColumnReader(field, valueDecoders::getRescaledLongDecimalDecoder, INT128_ADAPTER, memoryContext); - } - return createColumnReader(field, valueDecoders::getLongDecimalDecoder, INT128_ADAPTER, memoryContext); - } - if (type instanceof VarcharType varcharType && !varcharType.isUnbounded() && primitiveType == BINARY) { - return createColumnReader(field, valueDecoders::getBoundedVarcharBinaryDecoder, BINARY_ADAPTER, memoryContext); - } - if (type instanceof CharType && primitiveType == BINARY) { - return createColumnReader(field, valueDecoders::getCharBinaryDecoder, BINARY_ADAPTER, memoryContext); + return switch (timestampAnnotation.getUnit()) { + case MILLIS, NANOS -> throw unsupportedException(type, field); + case MICROS -> createColumnReader(field, valueDecoders::getInt64TimestampMicrosToLongTimestampWithTimeZoneDecoder, FIXED12_ADAPTER, memoryContext); + }; + } + if (type instanceof DecimalType decimalType && decimalType.isShort() + && isIntegerOrDecimalPrimitive(primitiveType)) { + if (primitiveType == INT32 && isIntegerAnnotation(annotation)) { + return createColumnReader(field, valueDecoders::getInt32ToShortDecimalDecoder, LONG_ADAPTER, memoryContext); } - if (type instanceof AbstractVariableWidthType && primitiveType == BINARY) { - return createColumnReader(field, valueDecoders::getBinaryDecoder, BINARY_ADAPTER, memoryContext); + if (!(annotation instanceof DecimalLogicalTypeAnnotation decimalAnnotation)) { + throw unsupportedException(type, field); } - if ((VARBINARY.equals(type) || VARCHAR.equals(type)) && primitiveType == FIXED_LEN_BYTE_ARRAY) { - return createColumnReader(field, valueDecoders::getFixedWidthBinaryDecoder, BINARY_ADAPTER, memoryContext); + if (isDecimalRescaled(decimalAnnotation, decimalType)) { + return createColumnReader(field, valueDecoders::getRescaledShortDecimalDecoder, LONG_ADAPTER, memoryContext); } - if (UUID.equals(type) && primitiveType == FIXED_LEN_BYTE_ARRAY) { - // Iceberg 0.11.1 writes UUID as FIXED_LEN_BYTE_ARRAY without logical type annotation (see https://github.com/apache/iceberg/pull/2913) - // To support such files, we bet on the logical type to be UUID based on the Trino UUID type check. - if (annotation == null || isLogicalUuid(annotation)) { - return createColumnReader(field, valueDecoders::getUuidDecoder, INT128_ADAPTER, memoryContext); - } + return createColumnReader(field, valueDecoders::getShortDecimalDecoder, LONG_ADAPTER, memoryContext); + } + if (type instanceof DecimalType decimalType && !decimalType.isShort() + && isIntegerOrDecimalPrimitive(primitiveType)) { + if (!(annotation instanceof DecimalLogicalTypeAnnotation decimalAnnotation)) { throw unsupportedException(type, field); } - throw new TrinoException( - NOT_SUPPORTED, - format("Reading Trino column (%s) from Parquet column (%s) is not supported by optimized parquet reader", type, field.getDescriptor())); - } - - return switch (primitiveType) { - case BOOLEAN -> new BooleanColumnReader(field); - case INT32 -> createDecimalColumnReader(field).orElseGet(() -> { - if (type instanceof DecimalType decimalType && decimalType.isShort()) { - return new Int32ShortDecimalColumnReader(field); - } - return new IntColumnReader(field); - }); - case INT64 -> { - if (annotation instanceof TimeLogicalTypeAnnotation timeAnnotation) { - if (field.getType() instanceof TimeType && timeAnnotation.getUnit() == MICROS) { - yield new TimeMicrosColumnReader(field); - } - else if (BIGINT.equals(field.getType())) { - yield new LongColumnReader(field); - } - throw unsupportedException(type, field); - } - if (annotation instanceof TimestampLogicalTypeAnnotation timestampAnnotation) { - if (timestampAnnotation.getUnit() == MILLIS) { - yield new Int64TimestampMillisColumnReader(field); - } - if (timestampAnnotation.getUnit() == MICROS) { - yield new TimestampMicrosColumnReader(field); - } - if (timestampAnnotation.getUnit() == NANOS) { - yield new Int64TimestampNanosColumnReader(field); - } - throw unsupportedException(type, field); - } - yield createDecimalColumnReader(field).orElse(new LongColumnReader(field)); + if (isDecimalRescaled(decimalAnnotation, decimalType)) { + return createColumnReader(field, valueDecoders::getRescaledLongDecimalDecoder, INT128_ADAPTER, memoryContext); } - case INT96 -> new TimestampColumnReader(field, timeZone); - case FLOAT -> new FloatColumnReader(field); - case DOUBLE -> new DoubleColumnReader(field); - case BINARY -> createDecimalColumnReader(field).orElse(new BinaryColumnReader(field)); - case FIXED_LEN_BYTE_ARRAY -> { - Optional decimalColumnReader = createDecimalColumnReader(field); - if (decimalColumnReader.isPresent()) { - yield decimalColumnReader.get(); - } - if (isLogicalUuid(annotation)) { - yield new UuidColumnReader(field); - } - if (VARBINARY.equals(type) || VARCHAR.equals(type)) { - yield new BinaryColumnReader(field); - } - if (annotation == null) { - // Iceberg 0.11.1 writes UUID as FIXED_LEN_BYTE_ARRAY without logical type annotation (see https://github.com/apache/iceberg/pull/2913) - // To support such files, we bet on the type to be UUID, which gets verified later, when reading the column data. - yield new UuidColumnReader(field); - } - throw unsupportedException(type, field); + return createColumnReader(field, valueDecoders::getLongDecimalDecoder, INT128_ADAPTER, memoryContext); + } + if (type instanceof VarcharType varcharType && !varcharType.isUnbounded() && primitiveType == BINARY) { + return createColumnReader(field, valueDecoders::getBoundedVarcharBinaryDecoder, BINARY_ADAPTER, memoryContext); + } + if (type instanceof CharType && primitiveType == BINARY) { + return createColumnReader(field, valueDecoders::getCharBinaryDecoder, BINARY_ADAPTER, memoryContext); + } + if (type instanceof AbstractVariableWidthType && primitiveType == BINARY) { + return createColumnReader(field, valueDecoders::getBinaryDecoder, BINARY_ADAPTER, memoryContext); + } + if ((VARBINARY.equals(type) || VARCHAR.equals(type)) && primitiveType == FIXED_LEN_BYTE_ARRAY) { + return createColumnReader(field, valueDecoders::getFixedWidthBinaryDecoder, BINARY_ADAPTER, memoryContext); + } + if (UUID.equals(type) && primitiveType == FIXED_LEN_BYTE_ARRAY) { + // Iceberg 0.11.1 writes UUID as FIXED_LEN_BYTE_ARRAY without logical type annotation (see https://github.com/apache/iceberg/pull/2913) + // To support such files, we bet on the logical type to be UUID based on the Trino UUID type check. + if (annotation == null || isLogicalUuid(annotation)) { + return createColumnReader(field, valueDecoders::getUuidDecoder, INT128_ADAPTER, memoryContext); } - }; + } + throw unsupportedException(type, field); } private static ColumnReader createColumnReader( @@ -369,14 +301,6 @@ private static ColumnReader createColumnReader( memoryContext); } - private boolean useBatchedColumnReaders(PrimitiveField field) - { - if (isFlatColumn(field)) { - return useBatchColumnReaders; - } - return useBatchColumnReaders && useBatchNestedColumnReaders; - } - private static boolean isFlatColumn(PrimitiveField field) { return field.getDescriptor().getPath().length == 1; @@ -396,12 +320,6 @@ public Optional visit(UUIDLogicalTypeAnnotation uuidLogicalType) .orElse(FALSE); } - private static Optional createDecimalColumnReader(PrimitiveField field) - { - return createDecimalType(field) - .map(decimalType -> DecimalColumnReaderFactory.createReader(field, decimalType)); - } - private static boolean isDecimalRescaled(DecimalLogicalTypeAnnotation decimalAnnotation, DecimalType trinoType) { return decimalAnnotation.getPrecision() != trinoType.getPrecision() diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/DecimalColumnReaderFactory.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/DecimalColumnReaderFactory.java deleted file mode 100644 index d16c896f7517..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/DecimalColumnReaderFactory.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.trino.parquet.PrimitiveField; -import io.trino.spi.type.DecimalType; - -public final class DecimalColumnReaderFactory -{ - private DecimalColumnReaderFactory() {} - - public static PrimitiveColumnReader createReader(PrimitiveField field, DecimalType parquetDecimalType) - { - if (parquetDecimalType.isShort()) { - return new ShortDecimalColumnReader(field, parquetDecimalType); - } - return new LongDecimalColumnReader(field, parquetDecimalType); - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/DoubleColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/DoubleColumnReader.java deleted file mode 100644 index d807ad90e000..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/DoubleColumnReader.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.trino.parquet.PrimitiveField; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.type.Type; - -public class DoubleColumnReader - extends PrimitiveColumnReader -{ - public DoubleColumnReader(PrimitiveField field) - { - super(field); - } - - @Override - protected void readValue(BlockBuilder blockBuilder, Type type) - { - type.writeDouble(blockBuilder, valuesReader.readDouble()); - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/FloatColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/FloatColumnReader.java deleted file mode 100644 index e493087da027..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/FloatColumnReader.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.trino.parquet.PrimitiveField; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.type.Type; - -import static io.trino.spi.type.DoubleType.DOUBLE; -import static io.trino.spi.type.RealType.REAL; -import static java.lang.Float.floatToRawIntBits; - -public class FloatColumnReader - extends PrimitiveColumnReader -{ - public FloatColumnReader(PrimitiveField field) - { - super(field); - } - - @Override - protected void readValue(BlockBuilder blockBuilder, Type type) - { - if (type == REAL) { - type.writeLong(blockBuilder, floatToRawIntBits(valuesReader.readFloat())); - } - else if (type == DOUBLE) { - type.writeDouble(blockBuilder, valuesReader.readFloat()); - } - else { - throw new VerifyError("Unsupported type " + type); - } - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/Int32ShortDecimalColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/Int32ShortDecimalColumnReader.java deleted file mode 100644 index cfd67252662b..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/Int32ShortDecimalColumnReader.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.trino.parquet.PrimitiveField; -import io.trino.spi.TrinoException; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.type.DecimalType; -import io.trino.spi.type.Type; - -import static io.trino.spi.StandardErrorCode.INVALID_CAST_ARGUMENT; -import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; -import static io.trino.spi.type.Decimals.overflows; -import static java.lang.String.format; - -public class Int32ShortDecimalColumnReader - extends PrimitiveColumnReader -{ - public Int32ShortDecimalColumnReader(PrimitiveField field) - { - super(field); - } - - @Override - protected void readValue(BlockBuilder blockBuilder, Type trinoType) - { - if (trinoType instanceof DecimalType trinoDecimalType && trinoDecimalType.isShort()) { - long value = valuesReader.readInteger(); - if (overflows(value, trinoDecimalType.getPrecision())) { - throw new TrinoException(INVALID_CAST_ARGUMENT, format("Cannot read parquet INT32 value '%s' as DECIMAL(%s, %s)", value, trinoDecimalType.getPrecision(), trinoDecimalType.getScale())); - } - - trinoType.writeLong(blockBuilder, value); - } - else { - throw new TrinoException(NOT_SUPPORTED, format("Unsupported Trino column type (%s) for Parquet column (%s)", trinoType, field.getDescriptor())); - } - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/Int64TimestampMillisColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/Int64TimestampMillisColumnReader.java deleted file mode 100644 index 421702c8dce8..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/Int64TimestampMillisColumnReader.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.trino.parquet.PrimitiveField; -import io.trino.spi.TrinoException; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.type.LongTimestamp; -import io.trino.spi.type.TimestampType; -import io.trino.spi.type.TimestampWithTimeZoneType; -import io.trino.spi.type.Type; - -import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone; -import static io.trino.spi.type.TimeZoneKey.UTC_KEY; -import static io.trino.spi.type.Timestamps.MICROSECONDS_PER_MILLISECOND; -import static java.lang.String.format; - -public class Int64TimestampMillisColumnReader - extends PrimitiveColumnReader -{ - public Int64TimestampMillisColumnReader(PrimitiveField field) - { - super(field); - } - - @Override - protected void readValue(BlockBuilder blockBuilder, Type type) - { - long epochMillis = valuesReader.readLong(); - if (type instanceof TimestampWithTimeZoneType) { - type.writeLong(blockBuilder, packDateTimeWithZone(epochMillis, UTC_KEY)); - } - else if (type instanceof TimestampType) { - long epochMicros = epochMillis * MICROSECONDS_PER_MILLISECOND; - if (((TimestampType) type).isShort()) { - type.writeLong(blockBuilder, epochMicros); - } - else { - type.writeObject(blockBuilder, new LongTimestamp(epochMicros, 0)); - } - } - else if (type == BIGINT) { - type.writeLong(blockBuilder, epochMillis); - } - else { - throw new TrinoException(NOT_SUPPORTED, format("Unsupported Trino column type (%s) for Parquet column (%s)", type, field.getDescriptor())); - } - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/Int64TimestampNanosColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/Int64TimestampNanosColumnReader.java deleted file mode 100644 index b5a64fec78d3..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/Int64TimestampNanosColumnReader.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.trino.parquet.PrimitiveField; -import io.trino.spi.TrinoException; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.type.LongTimestamp; -import io.trino.spi.type.Timestamps; -import io.trino.spi.type.Type; - -import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.spi.type.TimestampType.TIMESTAMP_MICROS; -import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS; -import static io.trino.spi.type.TimestampType.TIMESTAMP_NANOS; -import static io.trino.spi.type.Timestamps.NANOSECONDS_PER_MICROSECOND; -import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_NANOSECOND; -import static java.lang.Math.floorDiv; -import static java.lang.Math.floorMod; -import static java.lang.String.format; - -public class Int64TimestampNanosColumnReader - extends PrimitiveColumnReader -{ - public Int64TimestampNanosColumnReader(PrimitiveField field) - { - super(field); - } - - @Override - protected void readValue(BlockBuilder blockBuilder, Type type) - { - long epochNanos = valuesReader.readLong(); - // TODO: specialize the class at creation time - if (type == TIMESTAMP_MILLIS) { - type.writeLong(blockBuilder, Timestamps.round(epochNanos, 6) / NANOSECONDS_PER_MICROSECOND); - } - else if (type == TIMESTAMP_MICROS) { - type.writeLong(blockBuilder, Timestamps.round(epochNanos, 3) / NANOSECONDS_PER_MICROSECOND); - } - else if (type == TIMESTAMP_NANOS) { - type.writeObject(blockBuilder, new LongTimestamp( - floorDiv(epochNanos, NANOSECONDS_PER_MICROSECOND), - floorMod(epochNanos, NANOSECONDS_PER_MICROSECOND) * PICOSECONDS_PER_NANOSECOND)); - } - else if (type == BIGINT) { - type.writeLong(blockBuilder, epochNanos); - } - else { - throw new TrinoException(NOT_SUPPORTED, format("Unsupported Trino column type (%s) for Parquet column (%s)", type, field.getDescriptor())); - } - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/IntColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/IntColumnReader.java deleted file mode 100644 index f130fa2f0654..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/IntColumnReader.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.trino.parquet.PrimitiveField; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.type.Type; - -public class IntColumnReader - extends PrimitiveColumnReader -{ - public IntColumnReader(PrimitiveField field) - { - super(field); - } - - @Override - protected void readValue(BlockBuilder blockBuilder, Type type) - { - type.writeLong(blockBuilder, valuesReader.readInteger()); - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LevelNullReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LevelNullReader.java deleted file mode 100644 index 06b66d7a997a..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LevelNullReader.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -public class LevelNullReader - implements LevelReader -{ - @Override - public int readLevel() - { - return 0; - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LevelRLEReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LevelRLEReader.java deleted file mode 100644 index 05fe478dcf61..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LevelRLEReader.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridDecoder; -import org.apache.parquet.io.ParquetDecodingException; - -import java.io.IOException; - -public class LevelRLEReader - implements LevelReader -{ - private final RunLengthBitPackingHybridDecoder delegate; - - public LevelRLEReader(RunLengthBitPackingHybridDecoder delegate) - { - this.delegate = delegate; - } - - @Override - public int readLevel() - { - try { - return delegate.readInt(); - } - catch (IOException e) { - throw new ParquetDecodingException(e); - } - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LevelReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LevelReader.java deleted file mode 100644 index ef9c019c45b4..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LevelReader.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -public interface LevelReader -{ - int readLevel(); -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LevelValuesReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LevelValuesReader.java deleted file mode 100644 index 86edfb925f87..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LevelValuesReader.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import org.apache.parquet.column.values.ValuesReader; - -public class LevelValuesReader - implements LevelReader -{ - private final ValuesReader delegate; - - public LevelValuesReader(ValuesReader delegate) - { - this.delegate = delegate; - } - - @Override - public int readLevel() - { - return delegate.readInteger(); - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LongColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LongColumnReader.java deleted file mode 100644 index 027814c7162d..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LongColumnReader.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.trino.parquet.PrimitiveField; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.type.Type; - -public class LongColumnReader - extends PrimitiveColumnReader -{ - public LongColumnReader(PrimitiveField field) - { - super(field); - } - - @Override - protected void readValue(BlockBuilder blockBuilder, Type type) - { - type.writeLong(blockBuilder, valuesReader.readLong()); - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LongDecimalColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LongDecimalColumnReader.java deleted file mode 100644 index d11fd057d01a..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/LongDecimalColumnReader.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.trino.parquet.PrimitiveField; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.type.DecimalType; -import io.trino.spi.type.Int128; -import io.trino.spi.type.Type; -import org.apache.parquet.io.ParquetDecodingException; -import org.apache.parquet.io.api.Binary; - -import static io.trino.spi.type.DecimalConversions.longToLongCast; -import static io.trino.spi.type.DecimalConversions.longToShortCast; -import static java.lang.String.format; -import static java.util.Objects.requireNonNull; - -public class LongDecimalColumnReader - extends PrimitiveColumnReader -{ - private final DecimalType parquetDecimalType; - - LongDecimalColumnReader(PrimitiveField field, DecimalType parquetDecimalType) - { - super(field); - this.parquetDecimalType = requireNonNull(parquetDecimalType, "parquetDecimalType is null"); - } - - @Override - protected void readValue(BlockBuilder blockBuilder, Type trinoType) - { - if (!(trinoType instanceof DecimalType trinoDecimalType)) { - throw new ParquetDecodingException(format("Unsupported Trino column type (%s) for Parquet column (%s)", trinoType, field.getDescriptor())); - } - - Binary binary = valuesReader.readBytes(); - Int128 value = Int128.fromBigEndian(binary.getBytes()); - - if (trinoDecimalType.isShort()) { - trinoType.writeLong(blockBuilder, longToShortCast( - value, - parquetDecimalType.getPrecision(), - parquetDecimalType.getScale(), - trinoDecimalType.getPrecision(), - trinoDecimalType.getScale())); - } - else { - trinoType.writeObject(blockBuilder, longToLongCast( - value, - parquetDecimalType.getPrecision(), - parquetDecimalType.getScale(), - trinoDecimalType.getPrecision(), - trinoDecimalType.getScale())); - } - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java index d8c25025e0e3..8f3a3e78e518 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java @@ -167,7 +167,7 @@ public ParquetReader( this.blocks = requireNonNull(blocks, "blocks is null"); this.firstRowsOfBlocks = requireNonNull(firstRowsOfBlocks, "firstRowsOfBlocks is null"); this.dataSource = requireNonNull(dataSource, "dataSource is null"); - this.columnReaderFactory = new ColumnReaderFactory(timeZone, options); + this.columnReaderFactory = new ColumnReaderFactory(timeZone); this.memoryContext = requireNonNull(memoryContext, "memoryContext is null"); this.currentRowGroupMemoryContext = memoryContext.newAggregatedMemoryContext(); this.options = requireNonNull(options, "options is null"); diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/PrimitiveColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/PrimitiveColumnReader.java deleted file mode 100644 index 4695f140e1f2..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/PrimitiveColumnReader.java +++ /dev/null @@ -1,375 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.airlift.slice.Slice; -import io.trino.parquet.DataPage; -import io.trino.parquet.DataPageV1; -import io.trino.parquet.DataPageV2; -import io.trino.parquet.DictionaryPage; -import io.trino.parquet.ParquetEncoding; -import io.trino.parquet.ParquetTypeUtils; -import io.trino.parquet.PrimitiveField; -import io.trino.parquet.dictionary.Dictionary; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.type.Type; -import it.unimi.dsi.fastutil.ints.IntArrayList; -import it.unimi.dsi.fastutil.ints.IntList; -import jakarta.annotation.Nullable; -import org.apache.parquet.bytes.ByteBufferInputStream; -import org.apache.parquet.bytes.BytesUtils; -import org.apache.parquet.column.values.ValuesReader; -import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridDecoder; -import org.apache.parquet.io.ParquetDecodingException; - -import java.io.IOException; -import java.util.Optional; -import java.util.OptionalLong; -import java.util.PrimitiveIterator; - -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Verify.verify; -import static io.trino.parquet.ParquetReaderUtils.toInputStream; -import static io.trino.parquet.ValuesType.DEFINITION_LEVEL; -import static io.trino.parquet.ValuesType.REPETITION_LEVEL; -import static io.trino.parquet.ValuesType.VALUES; -import static java.util.Objects.requireNonNull; - -public abstract class PrimitiveColumnReader - implements ColumnReader -{ - private static final int EMPTY_LEVEL_VALUE = -1; - protected final PrimitiveField field; - - protected int definitionLevel = EMPTY_LEVEL_VALUE; - protected int repetitionLevel = EMPTY_LEVEL_VALUE; - protected ValuesReader valuesReader; - - private int nextBatchSize; - private LevelReader repetitionReader; - private LevelReader definitionReader; - private PageReader pageReader; - private Dictionary dictionary; - private DataPage page; - private int remainingValueCountInPage; - private int readOffset; - @Nullable - private PrimitiveIterator.OfLong indexIterator; - private long currentRow; - private long targetRow; - - protected abstract void readValue(BlockBuilder blockBuilder, Type type); - - private void skipSingleValue() - { - if (definitionLevel == field.getDescriptor().getMaxDefinitionLevel()) { - valuesReader.skip(); - } - } - - protected boolean isValueNull() - { - return ParquetTypeUtils.isValueNull(field.isRequired(), definitionLevel, field.getDefinitionLevel()); - } - - public PrimitiveColumnReader(PrimitiveField field) - { - this.field = requireNonNull(field, "columnDescriptor"); - pageReader = null; - this.targetRow = 0; - this.indexIterator = null; - } - - @Override - public boolean hasPageReader() - { - return pageReader != null; - } - - @Override - public void setPageReader(PageReader pageReader, Optional rowRanges) - { - this.pageReader = requireNonNull(pageReader, "pageReader"); - DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); - - if (dictionaryPage != null) { - try { - dictionary = dictionaryPage.getEncoding().initDictionary(field.getDescriptor(), dictionaryPage); - } - catch (IOException e) { - throw new ParquetDecodingException("could not decode the dictionary for " + field.getDescriptor(), e); - } - } - else { - dictionary = null; - } - if (rowRanges.isPresent()) { - indexIterator = rowRanges.get().getParquetRowRanges().iterator(); - // If rowRanges is empty for a row-group, then no page needs to be read, and we should not reach here - checkArgument(indexIterator.hasNext(), "rowRanges is empty"); - targetRow = indexIterator.next(); - } - } - - @Override - public void prepareNextRead(int batchSize) - { - readOffset = readOffset + nextBatchSize; - nextBatchSize = batchSize; - } - - @Override - public ColumnChunk readPrimitive() - { - // Pre-allocate these arrays to the necessary size. This saves a substantial amount of - // CPU time by avoiding container resizing. - IntList definitionLevels = new IntArrayList(nextBatchSize); - IntList repetitionLevels = new IntArrayList(nextBatchSize); - seek(); - BlockBuilder blockBuilder = field.getType().createBlockBuilder(null, nextBatchSize); - int valueCount = 0; - while (valueCount < nextBatchSize) { - if (page == null) { - readNextPage(); - } - int valuesToRead = Math.min(remainingValueCountInPage, nextBatchSize - valueCount); - if (valuesToRead == 0) { - // When we break here, we could end up with valueCount < nextBatchSize, but that is OK. - break; - } - readValues(blockBuilder, valuesToRead, field.getType(), definitionLevels, repetitionLevels); - valueCount += valuesToRead; - } - - readOffset = 0; - nextBatchSize = 0; - return new ColumnChunk(blockBuilder.build(), definitionLevels.toIntArray(), repetitionLevels.toIntArray()); - } - - private void readValues(BlockBuilder blockBuilder, int valuesToRead, Type type, IntList definitionLevels, IntList repetitionLevels) - { - processValues(valuesToRead, () -> { - if (definitionLevel == field.getDefinitionLevel()) { - readValue(blockBuilder, type); - } - else if (isValueNull()) { - blockBuilder.appendNull(); - } - definitionLevels.add(definitionLevel); - repetitionLevels.add(repetitionLevel); - }); - } - - private void skipValues(long valuesToRead) - { - processValues(valuesToRead, this::skipSingleValue); - } - - /** - * When filtering using column indexes we might skip reading some pages for different columns. Because the rows are - * not aligned between the pages of the different columns it might be required to skip some values. The values (and the - * related rl and dl) are skipped based on the iterator of the required row indexes and the first row index of each - * page. - * For example: - * - *
-     * rows   col1   col2   col3
-     *      ┌──────┬──────┬──────┐
-     *   0  │  p0  │      │      │
-     *      ╞══════╡  p0  │  p0  │
-     *  20  │ p1(X)│------│------│
-     *      ╞══════╪══════╡      │
-     *  40  │ p2(X)│      │------│
-     *      ╞══════╡ p1(X)╞══════╡
-     *  60  │ p3(X)│      │------│
-     *      ╞══════╪══════╡      │
-     *  80  │  p4  │      │  p1  │
-     *      ╞══════╡  p2  │      │
-     * 100  │  p5  │      │      │
-     *      └──────┴──────┴──────┘
-     * 
- *

- * The pages 1, 2, 3 in col1 are skipped so we have to skip the rows [20, 79]. Because page 1 in col2 contains values - * only for the rows [40, 79] we skip this entire page as well. To synchronize the row reading we have to skip the - * values (and the related rl and dl) for the rows [20, 39] in the end of the page 0 for col2. Similarly, we have to - * skip values while reading page0 and page1 for col3. - */ - private void processValues(long valuesToRead, Runnable valueReader) - { - if (definitionLevel == EMPTY_LEVEL_VALUE && repetitionLevel == EMPTY_LEVEL_VALUE) { - definitionLevel = definitionReader.readLevel(); - repetitionLevel = repetitionReader.readLevel(); - } - int valueCount = 0; - int skipCount = 0; - for (int i = 0; i < valuesToRead; ) { - boolean consumed; - do { - if (incrementRowAndTestIfTargetReached(repetitionLevel)) { - valueReader.run(); - valueCount++; - consumed = true; - } - else { - skipSingleValue(); - skipCount++; - consumed = false; - } - - if (valueCount + skipCount == remainingValueCountInPage) { - updateValueCounts(valueCount, skipCount); - if (!readNextPage()) { - return; - } - valueCount = 0; - skipCount = 0; - } - - repetitionLevel = repetitionReader.readLevel(); - definitionLevel = definitionReader.readLevel(); - } - while (repetitionLevel != 0); - - if (consumed) { - i++; - } - } - updateValueCounts(valueCount, skipCount); - } - - private void seek() - { - if (readOffset == 0) { - return; - } - int readOffset = this.readOffset; - int valuePosition = 0; - while (valuePosition < readOffset) { - if (page == null) { - if (!readNextPage()) { - break; - } - } - int offset = Math.min(remainingValueCountInPage, readOffset - valuePosition); - skipValues(offset); - valuePosition = valuePosition + offset; - } - checkArgument(valuePosition == readOffset, "valuePosition %s must be equal to readOffset %s", valuePosition, readOffset); - } - - private boolean readNextPage() - { - verify(page == null, "readNextPage has to be called when page is null"); - page = pageReader.readPage(); - if (page == null) { - // we have read all pages - return false; - } - remainingValueCountInPage = page.getValueCount(); - if (page instanceof DataPageV1) { - valuesReader = readPageV1((DataPageV1) page); - } - else { - valuesReader = readPageV2((DataPageV2) page); - } - return true; - } - - private void updateValueCounts(int valuesRead, int skipCount) - { - int totalCount = valuesRead + skipCount; - if (totalCount == remainingValueCountInPage) { - page = null; - valuesReader = null; - } - remainingValueCountInPage -= totalCount; - } - - private ValuesReader readPageV1(DataPageV1 page) - { - ValuesReader rlReader = page.getRepetitionLevelEncoding().getValuesReader(field.getDescriptor(), REPETITION_LEVEL); - ValuesReader dlReader = page.getDefinitionLevelEncoding().getValuesReader(field.getDescriptor(), DEFINITION_LEVEL); - repetitionReader = new LevelValuesReader(rlReader); - definitionReader = new LevelValuesReader(dlReader); - try { - ByteBufferInputStream in = toInputStream(page.getSlice()); - rlReader.initFromPage(page.getValueCount(), in); - dlReader.initFromPage(page.getValueCount(), in); - return initDataReader(page.getValueEncoding(), page.getValueCount(), in, page.getFirstRowIndex()); - } - catch (IOException e) { - throw new ParquetDecodingException("Error reading parquet page " + page + " in column " + field.getDescriptor(), e); - } - } - - private ValuesReader readPageV2(DataPageV2 page) - { - repetitionReader = buildLevelRLEReader(field.getDescriptor().getMaxRepetitionLevel(), page.getRepetitionLevels()); - definitionReader = buildLevelRLEReader(field.getDescriptor().getMaxDefinitionLevel(), page.getDefinitionLevels()); - return initDataReader(page.getDataEncoding(), page.getValueCount(), toInputStream(page.getSlice()), page.getFirstRowIndex()); - } - - private LevelReader buildLevelRLEReader(int maxLevel, Slice slice) - { - if (maxLevel == 0) { - return new LevelNullReader(); - } - return new LevelRLEReader(new RunLengthBitPackingHybridDecoder(BytesUtils.getWidthFromMaxInt(maxLevel), slice.getInput())); - } - - private ValuesReader initDataReader(ParquetEncoding dataEncoding, int valueCount, ByteBufferInputStream in, OptionalLong firstRowIndex) - { - ValuesReader valuesReader; - if (dataEncoding.usesDictionary()) { - if (dictionary == null) { - throw new ParquetDecodingException("Dictionary is missing for Page"); - } - valuesReader = dataEncoding.getDictionaryBasedValuesReader(field.getDescriptor(), VALUES, dictionary); - } - else { - valuesReader = dataEncoding.getValuesReader(field.getDescriptor(), VALUES); - } - - try { - valuesReader.initFromPage(valueCount, in); - if (firstRowIndex.isPresent()) { - currentRow = firstRowIndex.getAsLong(); - } - return valuesReader; - } - catch (IOException e) { - throw new ParquetDecodingException("Error reading parquet page in column " + field.getDescriptor(), e); - } - } - - // Increment currentRow and return true if at or after targetRow - private boolean incrementRowAndTestIfTargetReached(int repetitionLevel) - { - if (indexIterator == null) { - return true; - } - - if (repetitionLevel == 0) { - if (currentRow > targetRow) { - targetRow = indexIterator.hasNext() ? indexIterator.next() : Long.MAX_VALUE; - } - boolean isAtTargetRow = currentRow == targetRow; - currentRow++; - return isAtTargetRow; - } - - // currentRow was incremented at repetitionLevel 0 - return currentRow - 1 == targetRow; - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ShortDecimalColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ShortDecimalColumnReader.java deleted file mode 100644 index 7950ca65016d..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ShortDecimalColumnReader.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.trino.parquet.PrimitiveField; -import io.trino.spi.TrinoException; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.type.DecimalType; -import io.trino.spi.type.Type; -import org.apache.parquet.io.ParquetDecodingException; - -import static com.google.common.base.Preconditions.checkArgument; -import static io.trino.parquet.ParquetTypeUtils.checkBytesFitInShortDecimal; -import static io.trino.parquet.ParquetTypeUtils.getShortDecimalValue; -import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.spi.type.DecimalConversions.shortToLongCast; -import static io.trino.spi.type.DecimalConversions.shortToShortCast; -import static io.trino.spi.type.Decimals.longTenToNth; -import static io.trino.spi.type.IntegerType.INTEGER; -import static io.trino.spi.type.SmallintType.SMALLINT; -import static io.trino.spi.type.TinyintType.TINYINT; -import static java.lang.String.format; -import static java.util.Objects.requireNonNull; -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64; - -public class ShortDecimalColumnReader - extends PrimitiveColumnReader -{ - private final DecimalType parquetDecimalType; - - ShortDecimalColumnReader(PrimitiveField field, DecimalType parquetDecimalType) - { - super(field); - this.parquetDecimalType = requireNonNull(parquetDecimalType, "parquetDecimalType is null"); - int typeLength = field.getDescriptor().getPrimitiveType().getTypeLength(); - checkArgument(typeLength <= 16, "Type length %s should be <= 16 for short decimal column %s", typeLength, field.getDescriptor()); - } - - @Override - protected void readValue(BlockBuilder blockBuilder, Type trinoType) - { - if (!((trinoType instanceof DecimalType) || isIntegerType(trinoType))) { - throw new ParquetDecodingException(format("Unsupported Trino column type (%s) for Parquet column (%s)", trinoType, field.getDescriptor())); - } - - long value; - - // When decimals are encoded with primitive types Parquet stores unscaled values - if (field.getDescriptor().getPrimitiveType().getPrimitiveTypeName() == INT32) { - value = valuesReader.readInteger(); - } - else if (field.getDescriptor().getPrimitiveType().getPrimitiveTypeName() == INT64) { - value = valuesReader.readLong(); - } - else { - byte[] bytes = valuesReader.readBytes().getBytes(); - if (bytes.length <= Long.BYTES) { - value = getShortDecimalValue(bytes); - } - else { - int startOffset = bytes.length - Long.BYTES; - checkBytesFitInShortDecimal(bytes, 0, startOffset, field.getDescriptor()); - value = getShortDecimalValue(bytes, startOffset, Long.BYTES); - } - } - - if (trinoType instanceof DecimalType trinoDecimalType) { - if (trinoDecimalType.isShort()) { - long rescale = longTenToNth(Math.abs(trinoDecimalType.getScale() - parquetDecimalType.getScale())); - long convertedValue = shortToShortCast( - value, - parquetDecimalType.getPrecision(), - parquetDecimalType.getScale(), - trinoDecimalType.getPrecision(), - trinoDecimalType.getScale(), - rescale, - rescale / 2); - - trinoType.writeLong(blockBuilder, convertedValue); - } - else { - trinoType.writeObject(blockBuilder, shortToLongCast( - value, - parquetDecimalType.getPrecision(), - parquetDecimalType.getScale(), - trinoDecimalType.getPrecision(), - trinoDecimalType.getScale())); - } - } - else { - if (parquetDecimalType.getScale() != 0) { - throw new TrinoException(NOT_SUPPORTED, format("Unsupported Trino column type (%s) for Parquet column (%s)", trinoType, field.getDescriptor())); - } - - if (!isInValidNumberRange(trinoType, value)) { - throw new TrinoException(NOT_SUPPORTED, format("Could not coerce from %s to %s: %s", parquetDecimalType, trinoType, value)); - } - trinoType.writeLong(blockBuilder, value); - } - } - - protected boolean isIntegerType(Type type) - { - return type.equals(TINYINT) || type.equals(SMALLINT) || type.equals(INTEGER) || type.equals(BIGINT); - } - - protected boolean isInValidNumberRange(Type type, long value) - { - if (type.equals(TINYINT)) { - return Byte.MIN_VALUE <= value && value <= Byte.MAX_VALUE; - } - if (type.equals(SMALLINT)) { - return Short.MIN_VALUE <= value && value <= Short.MAX_VALUE; - } - if (type.equals(INTEGER)) { - return Integer.MIN_VALUE <= value && value <= Integer.MAX_VALUE; - } - if (type.equals(BIGINT)) { - return true; - } - - throw new IllegalArgumentException("Unsupported type: " + type); - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/TimeMicrosColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/TimeMicrosColumnReader.java deleted file mode 100644 index ba1e7138856d..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/TimeMicrosColumnReader.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.trino.parquet.PrimitiveField; -import io.trino.spi.TrinoException; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.type.TimeType; -import io.trino.spi.type.Timestamps; -import io.trino.spi.type.Type; - -import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; -import static java.lang.String.format; - -public class TimeMicrosColumnReader - extends PrimitiveColumnReader -{ - public TimeMicrosColumnReader(PrimitiveField field) - { - super(field); - } - - @Override - protected void readValue(BlockBuilder blockBuilder, Type type) - { - long picos = valuesReader.readLong() * Timestamps.PICOSECONDS_PER_MICROSECOND; - if (type instanceof TimeType) { - type.writeLong(blockBuilder, picos); - } - else { - throw new TrinoException(NOT_SUPPORTED, format("Unsupported Trino column type (%s) for Parquet column (%s)", type, field.getDescriptor())); - } - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/TimestampColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/TimestampColumnReader.java deleted file mode 100644 index cf236d294396..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/TimestampColumnReader.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.trino.parquet.PrimitiveField; -import io.trino.plugin.base.type.DecodedTimestamp; -import io.trino.plugin.base.type.TrinoTimestampEncoder; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.type.TimestampType; -import io.trino.spi.type.TimestampWithTimeZoneType; -import io.trino.spi.type.Type; -import org.joda.time.DateTimeZone; - -import static io.trino.parquet.ParquetTimestampUtils.decodeInt96Timestamp; -import static io.trino.plugin.base.type.TrinoTimestampEncoderFactory.createTimestampEncoder; -import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone; -import static io.trino.spi.type.TimeZoneKey.UTC_KEY; -import static io.trino.spi.type.Timestamps.MILLISECONDS_PER_SECOND; -import static io.trino.spi.type.Timestamps.NANOSECONDS_PER_MILLISECOND; -import static java.util.Objects.requireNonNull; - -public class TimestampColumnReader - extends PrimitiveColumnReader -{ - private final DateTimeZone timeZone; - - public TimestampColumnReader(PrimitiveField field, DateTimeZone timeZone) - { - super(field); - this.timeZone = requireNonNull(timeZone, "timeZone is null"); - } - - // TODO: refactor to provide type at construction time (https://github.com/trinodb/trino/issues/5198) - @Override - protected void readValue(BlockBuilder blockBuilder, Type type) - { - if (type instanceof TimestampWithTimeZoneType) { - DecodedTimestamp decodedTimestamp = decodeInt96Timestamp(valuesReader.readBytes()); - long utcMillis = decodedTimestamp.epochSeconds() * MILLISECONDS_PER_SECOND + decodedTimestamp.nanosOfSecond() / NANOSECONDS_PER_MILLISECOND; - type.writeLong(blockBuilder, packDateTimeWithZone(utcMillis, UTC_KEY)); - } - else { - TrinoTimestampEncoder trinoTimestampEncoder = createTimestampEncoder((TimestampType) type, timeZone); - trinoTimestampEncoder.write(decodeInt96Timestamp(valuesReader.readBytes()), blockBuilder); - } - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/TimestampMicrosColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/TimestampMicrosColumnReader.java deleted file mode 100644 index 6aafffc18397..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/TimestampMicrosColumnReader.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.trino.parquet.PrimitiveField; -import io.trino.spi.TrinoException; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.type.LongTimestamp; -import io.trino.spi.type.LongTimestampWithTimeZone; -import io.trino.spi.type.Timestamps; -import io.trino.spi.type.Type; - -import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone; -import static io.trino.spi.type.TimeZoneKey.UTC_KEY; -import static io.trino.spi.type.TimestampType.TIMESTAMP_MICROS; -import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS; -import static io.trino.spi.type.TimestampType.TIMESTAMP_NANOS; -import static io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MICROS; -import static io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS; -import static io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_NANOS; -import static io.trino.spi.type.Timestamps.MICROSECONDS_PER_MILLISECOND; -import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_MICROSECOND; -import static java.lang.Math.floorDiv; -import static java.lang.Math.floorMod; -import static java.lang.Math.toIntExact; -import static java.lang.String.format; - -public class TimestampMicrosColumnReader - extends PrimitiveColumnReader -{ - public TimestampMicrosColumnReader(PrimitiveField field) - { - super(field); - } - - @Override - protected void readValue(BlockBuilder blockBuilder, Type type) - { - long epochMicros = valuesReader.readLong(); - // TODO: specialize the class at creation time - if (type == TIMESTAMP_MILLIS) { - type.writeLong(blockBuilder, Timestamps.round(epochMicros, 3)); - } - else if (type == TIMESTAMP_MICROS) { - type.writeLong(blockBuilder, epochMicros); - } - else if (type == TIMESTAMP_NANOS) { - type.writeObject(blockBuilder, new LongTimestamp(epochMicros, 0)); - } - else if (type == TIMESTAMP_TZ_MILLIS) { - long epochMillis = Timestamps.round(epochMicros, 3) / MICROSECONDS_PER_MILLISECOND; - type.writeLong(blockBuilder, packDateTimeWithZone(epochMillis, UTC_KEY)); - } - else if (type == TIMESTAMP_TZ_MICROS || type == TIMESTAMP_TZ_NANOS) { - long epochMillis = floorDiv(epochMicros, MICROSECONDS_PER_MILLISECOND); - int picosOfMillis = toIntExact(floorMod(epochMicros, MICROSECONDS_PER_MILLISECOND)) * PICOSECONDS_PER_MICROSECOND; - type.writeObject(blockBuilder, LongTimestampWithTimeZone.fromEpochMillisAndFraction(epochMillis, picosOfMillis, UTC_KEY)); - } - else if (type == BIGINT) { - type.writeLong(blockBuilder, epochMicros); - } - else { - throw new TrinoException(NOT_SUPPORTED, format("Unsupported Trino column type (%s) for Parquet column (%s)", type, field.getDescriptor())); - } - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/UuidColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/UuidColumnReader.java deleted file mode 100644 index c3f77249d927..000000000000 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/UuidColumnReader.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.airlift.slice.Slice; -import io.trino.parquet.PrimitiveField; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.type.Type; -import org.apache.parquet.io.api.Binary; - -import static com.google.common.base.Preconditions.checkArgument; -import static io.airlift.slice.Slices.wrappedBuffer; -import static io.trino.spi.type.UuidType.UUID; - -public class UuidColumnReader - extends PrimitiveColumnReader -{ - public UuidColumnReader(PrimitiveField field) - { - super(field); - } - - @Override - protected void readValue(BlockBuilder blockBuilder, Type trinoType) - { - checkArgument(trinoType == UUID, "Unsupported type: %s", trinoType); - - Binary binary = valuesReader.readBytes(); - Slice slice = wrappedBuffer(binary.getBytes()); - trinoType.writeSlice(blockBuilder, slice); - } -} diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriter.java b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriter.java index 38d5170e65c6..15bcc99d7705 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriter.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriter.java @@ -83,7 +83,6 @@ public class ParquetWriter private final int chunkMaxLogicalBytes; private final Map, Type> primitiveTypes; private final CompressionCodec compressionCodec; - private final boolean useBatchColumnReadersForVerification; private final Optional parquetTimeZone; private final ImmutableList.Builder rowGroupBuilder = ImmutableList.builder(); @@ -104,7 +103,6 @@ public ParquetWriter( ParquetWriterOptions writerOption, CompressionCodec compressionCodec, String trinoVersion, - boolean useBatchColumnReadersForVerification, Optional parquetTimeZone, Optional validationBuilder) { @@ -114,7 +112,6 @@ public ParquetWriter( this.primitiveTypes = requireNonNull(primitiveTypes, "primitiveTypes is null"); this.writerOption = requireNonNull(writerOption, "writerOption is null"); this.compressionCodec = requireNonNull(compressionCodec, "compressionCodec is null"); - this.useBatchColumnReadersForVerification = useBatchColumnReadersForVerification; this.parquetTimeZone = requireNonNull(parquetTimeZone, "parquetTimeZone is null"); this.createdBy = formatCreatedBy(requireNonNull(trinoVersion, "trinoVersion is null")); @@ -258,7 +255,7 @@ private ParquetReader createParquetReader(ParquetDataSource input, ParquetMetada input, parquetTimeZone.orElseThrow(), newSimpleAggregatedMemoryContext(), - new ParquetReaderOptions().withBatchColumnReaders(useBatchColumnReadersForVerification), + new ParquetReaderOptions(), exception -> { throwIfUnchecked(exception); return new RuntimeException(exception); diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/ParquetTestUtils.java b/lib/trino-parquet/src/test/java/io/trino/parquet/ParquetTestUtils.java index 06c9c79e5d30..8c2ebe947375 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/ParquetTestUtils.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/ParquetTestUtils.java @@ -90,7 +90,6 @@ public static ParquetWriter createParquetWriter(OutputStream outputStream, Parqu writerOptions, CompressionCodec.SNAPPY, "test-version", - false, Optional.of(DateTimeZone.getDefault()), Optional.empty()); } diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/AbstractColumnReaderBenchmark.java b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/AbstractColumnReaderBenchmark.java index 44325171784f..3fbfa3f18161 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/AbstractColumnReaderBenchmark.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/AbstractColumnReaderBenchmark.java @@ -17,7 +17,6 @@ import io.airlift.slice.Slices; import io.trino.parquet.DataPage; import io.trino.parquet.DataPageV1; -import io.trino.parquet.ParquetReaderOptions; import io.trino.parquet.PrimitiveField; import org.apache.parquet.column.values.ValuesWriter; import org.openjdk.jmh.annotations.Benchmark; @@ -61,7 +60,7 @@ public abstract class AbstractColumnReaderBenchmark private static final int DATA_GENERATION_BATCH_SIZE = 16384; private static final int READ_BATCH_SIZE = 4096; - private final ColumnReaderFactory columnReaderFactory = new ColumnReaderFactory(UTC, new ParquetReaderOptions().withBatchColumnReaders(true)); + private final ColumnReaderFactory columnReaderFactory = new ColumnReaderFactory(UTC); private final List dataPages = new ArrayList<>(); private int dataPositions; diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/AbstractColumnReaderTest.java b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/AbstractColumnReaderTest.java index 0a6a09c3bac6..49dee968e07b 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/AbstractColumnReaderTest.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/AbstractColumnReaderTest.java @@ -24,7 +24,6 @@ import io.trino.parquet.DictionaryPage; import io.trino.parquet.Page; import io.trino.parquet.ParquetEncoding; -import io.trino.parquet.ParquetReaderOptions; import io.trino.parquet.PrimitiveField; import io.trino.parquet.reader.TestingColumnReader.ColumnReaderFormat; import io.trino.parquet.reader.TestingColumnReader.DataPageVersion; @@ -538,7 +537,7 @@ public void testMemoryUsage(DataPageVersion version, ColumnReaderFormat f // Create reader PrimitiveField field = createField(format, true); AggregatedMemoryContext memoryContext = newSimpleAggregatedMemoryContext(); - ColumnReaderFactory columnReaderFactory = new ColumnReaderFactory(UTC, new ParquetReaderOptions().withBatchColumnReaders(true)); + ColumnReaderFactory columnReaderFactory = new ColumnReaderFactory(UTC); ColumnReader reader = columnReaderFactory.create(field, memoryContext); // Write data DictionaryValuesWriter dictionaryWriter = format.getDictionaryWriter(); diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestColumnReaderFactory.java b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestColumnReaderFactory.java deleted file mode 100644 index b4749d526b4c..000000000000 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestColumnReaderFactory.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.parquet.reader; - -import io.trino.parquet.ParquetReaderOptions; -import io.trino.parquet.PrimitiveField; -import io.trino.parquet.reader.flat.FlatColumnReader; -import org.apache.parquet.column.ColumnDescriptor; -import org.apache.parquet.schema.PrimitiveType; -import org.testng.annotations.Test; - -import static io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext; -import static io.trino.spi.type.IntegerType.INTEGER; -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; -import static org.apache.parquet.schema.Type.Repetition.OPTIONAL; -import static org.assertj.core.api.Assertions.assertThat; -import static org.joda.time.DateTimeZone.UTC; - -public class TestColumnReaderFactory -{ - @Test - public void testUseBatchedColumnReaders() - { - PrimitiveField field = new PrimitiveField( - INTEGER, - false, - new ColumnDescriptor(new String[] {"test"}, new PrimitiveType(OPTIONAL, INT32, "test"), 0, 1), - 0); - ColumnReaderFactory columnReaderFactory = new ColumnReaderFactory(UTC, new ParquetReaderOptions().withBatchColumnReaders(false)); - assertThat(columnReaderFactory.create(field, newSimpleAggregatedMemoryContext())) - .isNotInstanceOf(AbstractColumnReader.class); - columnReaderFactory = new ColumnReaderFactory(UTC, new ParquetReaderOptions().withBatchColumnReaders(true)); - assertThat(columnReaderFactory.create(field, newSimpleAggregatedMemoryContext())) - .isInstanceOf(FlatColumnReader.class); - } - - @Test - public void testNestedColumnReaders() - { - PrimitiveField field = new PrimitiveField( - INTEGER, - false, - new ColumnDescriptor(new String[] {"level1", "level2"}, new PrimitiveType(OPTIONAL, INT32, "test"), 1, 2), - 0); - ColumnReaderFactory columnReaderFactory = new ColumnReaderFactory(UTC, new ParquetReaderOptions().withBatchColumnReaders(false)); - assertThat(columnReaderFactory.create(field, newSimpleAggregatedMemoryContext())) - .isNotInstanceOf(AbstractColumnReader.class); - columnReaderFactory = new ColumnReaderFactory(UTC, new ParquetReaderOptions().withBatchColumnReaders(false).withBatchNestedColumnReaders(true)); - assertThat(columnReaderFactory.create(field, newSimpleAggregatedMemoryContext())) - .isNotInstanceOf(AbstractColumnReader.class); - - columnReaderFactory = new ColumnReaderFactory(UTC, new ParquetReaderOptions().withBatchColumnReaders(true)); - assertThat(columnReaderFactory.create(field, newSimpleAggregatedMemoryContext())) - .isInstanceOf(NestedColumnReader.class); - columnReaderFactory = new ColumnReaderFactory(UTC, new ParquetReaderOptions().withBatchColumnReaders(true).withBatchNestedColumnReaders(true)); - assertThat(columnReaderFactory.create(field, newSimpleAggregatedMemoryContext())) - .isInstanceOf(NestedColumnReader.class); - } -} diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestFlatColumnReaderRowRanges.java b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestFlatColumnReaderRowRanges.java index ba516ec37074..582558e617bb 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestFlatColumnReaderRowRanges.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestFlatColumnReaderRowRanges.java @@ -62,8 +62,6 @@ protected ColumnReaderProvider[] getColumnReaderProviders() private enum FlatColumnReaderProvider implements ColumnReaderProvider { - INT_PRIMITIVE_NO_NULLS(() -> new IntColumnReader(FIELD), FIELD), - INT_PRIMITIVE_NULLABLE(() -> new IntColumnReader(NULLABLE_FIELD), NULLABLE_FIELD), INT_FLAT_NO_NULLS(() -> createFlatColumnReader(FIELD), FIELD), INT_FLAT_NULLABLE(() -> createFlatColumnReader(NULLABLE_FIELD), NULLABLE_FIELD), /**/; diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestInt96Timestamp.java b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestInt96Timestamp.java index cd0e32f93a42..5939dd0253d1 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestInt96Timestamp.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestInt96Timestamp.java @@ -17,7 +17,6 @@ import io.airlift.slice.Slices; import io.trino.parquet.DataPage; import io.trino.parquet.DataPageV2; -import io.trino.parquet.ParquetReaderOptions; import io.trino.parquet.PrimitiveField; import io.trino.plugin.base.type.DecodedTimestamp; import io.trino.spi.block.Block; @@ -107,7 +106,7 @@ public void testVariousTimestamps(TimestampType type, BiFunction createNestedColumnReader(REPEATED_NULLABLE_FIELD), REPEATED_NULLABLE_FIELD), NESTED_READER_REPEATABLE_NESTED_NO_NULLS(() -> createNestedColumnReader(REPEATED_NESTED_FIELD), REPEATED_NESTED_FIELD), NESTED_READER_REPEATABLE_NESTED_NULLABLE(() -> createNestedColumnReader(REPEATED_NULLABLE_NESTED_FIELD), REPEATED_NULLABLE_NESTED_FIELD), - REPEATABLE_NESTED_NULLABLE(() -> new IntColumnReader(REPEATED_NULLABLE_NESTED_FIELD), REPEATED_NULLABLE_NESTED_FIELD), /**/; private final Supplier columnReader; diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/flat/TestFlatColumnReader.java b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/flat/TestFlatColumnReader.java index 06457048249e..1ef5d574c688 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/flat/TestFlatColumnReader.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/flat/TestFlatColumnReader.java @@ -18,7 +18,6 @@ import io.trino.parquet.DataPage; import io.trino.parquet.DataPageV1; import io.trino.parquet.ParquetEncoding; -import io.trino.parquet.ParquetReaderOptions; import io.trino.parquet.PrimitiveField; import io.trino.parquet.reader.AbstractColumnReaderTest; import io.trino.parquet.reader.ColumnReader; @@ -63,7 +62,7 @@ public class TestFlatColumnReader @Override protected ColumnReader createColumnReader(PrimitiveField field) { - ColumnReaderFactory columnReaderFactory = new ColumnReaderFactory(UTC, new ParquetReaderOptions().withBatchColumnReaders(true)); + ColumnReaderFactory columnReaderFactory = new ColumnReaderFactory(UTC); ColumnReader columnReader = columnReaderFactory.create(field, newSimpleAggregatedMemoryContext()); assertThat(columnReader).isInstanceOf(FlatColumnReader.class); return columnReader; diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/AbstractDeltaLakePageSink.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/AbstractDeltaLakePageSink.java index 8c75ea33ce11..7679ddc46167 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/AbstractDeltaLakePageSink.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/AbstractDeltaLakePageSink.java @@ -470,7 +470,6 @@ private FileWriter createParquetFileWriter(Location path) identityMapping, compressionCodec, trinoVersion, - false, Optional.empty(), Optional.empty()); } diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMergeSink.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMergeSink.java index ec1b074bd40d..a6caa6cec349 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMergeSink.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMergeSink.java @@ -384,7 +384,6 @@ private FileWriter createParquetFileWriter(Location path, List parquetFieldIdToName = columnMappingMode == ColumnMappingMode.ID ? loadParquetIdAndNameMapping(inputFile, options) : ImmutableMap.of(); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java index d1ecc0e2cbcc..b6142b06dd39 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java @@ -54,8 +54,6 @@ public final class DeltaLakeSessionProperties private static final String PARQUET_MAX_READ_BLOCK_SIZE = "parquet_max_read_block_size"; private static final String PARQUET_MAX_READ_BLOCK_ROW_COUNT = "parquet_max_read_block_row_count"; private static final String PARQUET_USE_COLUMN_INDEX = "parquet_use_column_index"; - private static final String PARQUET_OPTIMIZED_READER_ENABLED = "parquet_optimized_reader_enabled"; - private static final String PARQUET_OPTIMIZED_NESTED_READER_ENABLED = "parquet_optimized_nested_reader_enabled"; private static final String PARQUET_WRITER_BLOCK_SIZE = "parquet_writer_block_size"; private static final String PARQUET_WRITER_PAGE_SIZE = "parquet_writer_page_size"; private static final String TARGET_MAX_FILE_SIZE = "target_max_file_size"; @@ -124,16 +122,6 @@ public DeltaLakeSessionProperties( "Use Parquet column index", parquetReaderConfig.isUseColumnIndex(), false), - booleanProperty( - PARQUET_OPTIMIZED_READER_ENABLED, - "Use optimized Parquet reader", - parquetReaderConfig.isOptimizedReaderEnabled(), - false), - booleanProperty( - PARQUET_OPTIMIZED_NESTED_READER_ENABLED, - "Use optimized Parquet reader for nested columns", - parquetReaderConfig.isOptimizedNestedReaderEnabled(), - false), dataSizeProperty( PARQUET_WRITER_BLOCK_SIZE, "Parquet: Writer block size", @@ -250,16 +238,6 @@ public static boolean isParquetUseColumnIndex(ConnectorSession session) return session.getProperty(PARQUET_USE_COLUMN_INDEX, Boolean.class); } - public static boolean isParquetOptimizedReaderEnabled(ConnectorSession session) - { - return session.getProperty(PARQUET_OPTIMIZED_READER_ENABLED, Boolean.class); - } - - public static boolean isParquetOptimizedNestedReaderEnabled(ConnectorSession session) - { - return session.getProperty(PARQUET_OPTIMIZED_NESTED_READER_ENABLED, Boolean.class); - } - public static DataSize getParquetWriterBlockSize(ConnectorSession session) { return session.getProperty(PARQUET_WRITER_BLOCK_SIZE, DataSize.class); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/functions/tablechanges/TableChangesFunctionProcessor.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/functions/tablechanges/TableChangesFunctionProcessor.java index de65c3bd5459..e69dd5a046c0 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/functions/tablechanges/TableChangesFunctionProcessor.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/functions/tablechanges/TableChangesFunctionProcessor.java @@ -46,8 +46,6 @@ import static io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.getParquetMaxReadBlockRowCount; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.getParquetMaxReadBlockSize; -import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isParquetOptimizedNestedReaderEnabled; -import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isParquetOptimizedReaderEnabled; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isParquetUseColumnIndex; import static io.trino.plugin.deltalake.functions.tablechanges.TableChangesFileType.CDF_FILE; import static io.trino.spi.function.table.TableFunctionProcessorState.Finished.FINISHED; @@ -176,9 +174,7 @@ private static DeltaLakePageSource createDeltaLakePageSource( parquetReaderOptions = parquetReaderOptions .withMaxReadBlockSize(getParquetMaxReadBlockSize(session)) .withMaxReadBlockRowCount(getParquetMaxReadBlockRowCount(session)) - .withUseColumnIndex(isParquetUseColumnIndex(session)) - .withBatchColumnReaders(isParquetOptimizedReaderEnabled(session)) - .withBatchNestedColumnReaders(isParquetOptimizedNestedReaderEnabled(session)); + .withUseColumnIndex(isParquetUseColumnIndex(session)); List splitColumns = switch (split.fileType()) { case CDF_FILE -> ImmutableList.builder().addAll(handle.columns()) diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java index 99c8a92b0eaf..2e6a5c819197 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java @@ -138,7 +138,6 @@ public void write(CheckpointEntries entries, TrinoOutputFile outputFile) parquetWriterOptions, CompressionCodec.SNAPPY, trinoVersion, - false, Optional.of(DateTimeZone.UTC), Optional.empty()); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSessionProperties.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSessionProperties.java index 19e70dc17e1c..570d444b3d61 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSessionProperties.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSessionProperties.java @@ -105,8 +105,6 @@ public final class HiveSessionProperties private static final String PARQUET_USE_BLOOM_FILTER = "parquet_use_bloom_filter"; private static final String PARQUET_MAX_READ_BLOCK_SIZE = "parquet_max_read_block_size"; private static final String PARQUET_MAX_READ_BLOCK_ROW_COUNT = "parquet_max_read_block_row_count"; - private static final String PARQUET_OPTIMIZED_READER_ENABLED = "parquet_optimized_reader_enabled"; - private static final String PARQUET_OPTIMIZED_NESTED_READER_ENABLED = "parquet_optimized_nested_reader_enabled"; private static final String PARQUET_WRITER_BLOCK_SIZE = "parquet_writer_block_size"; private static final String PARQUET_WRITER_PAGE_SIZE = "parquet_writer_page_size"; private static final String PARQUET_WRITER_BATCH_SIZE = "parquet_writer_batch_size"; @@ -426,16 +424,6 @@ public HiveSessionProperties( } }, false), - booleanProperty( - PARQUET_OPTIMIZED_READER_ENABLED, - "Use optimized Parquet reader", - parquetReaderConfig.isOptimizedReaderEnabled(), - false), - booleanProperty( - PARQUET_OPTIMIZED_NESTED_READER_ENABLED, - "Use optimized Parquet reader for nested columns", - parquetReaderConfig.isOptimizedNestedReaderEnabled(), - false), dataSizeProperty( PARQUET_WRITER_BLOCK_SIZE, "Parquet: Writer block size", @@ -872,16 +860,6 @@ public static int getParquetMaxReadBlockRowCount(ConnectorSession session) return session.getProperty(PARQUET_MAX_READ_BLOCK_ROW_COUNT, Integer.class); } - public static boolean isParquetOptimizedReaderEnabled(ConnectorSession session) - { - return session.getProperty(PARQUET_OPTIMIZED_READER_ENABLED, Boolean.class); - } - - public static boolean isParquetOptimizedNestedReaderEnabled(ConnectorSession session) - { - return session.getProperty(PARQUET_OPTIMIZED_NESTED_READER_ENABLED, Boolean.class); - } - public static DataSize getParquetWriterBlockSize(ConnectorSession session) { return session.getProperty(PARQUET_WRITER_BLOCK_SIZE, DataSize.class); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetFileWriter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetFileWriter.java index bf875ac6223b..54368d6787ba 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetFileWriter.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetFileWriter.java @@ -75,7 +75,6 @@ public ParquetFileWriter( int[] fileInputColumnIndexes, CompressionCodec compressionCodec, String trinoVersion, - boolean useBatchColumnReadersForVerification, Optional parquetTimeZone, Optional> validationInputFactory) throws IOException @@ -92,7 +91,6 @@ public ParquetFileWriter( parquetWriterOptions, compressionCodec, trinoVersion, - useBatchColumnReadersForVerification, parquetTimeZone, validationInputFactory.isPresent() ? Optional.of(new ParquetWriteValidationBuilder(fileColumnTypes, fileColumnNames)) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetFileWriterFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetFileWriterFactory.java index d509dc18ae12..49faff70afda 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetFileWriterFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetFileWriterFactory.java @@ -53,7 +53,6 @@ import static io.trino.plugin.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR; import static io.trino.plugin.hive.HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED; import static io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision; -import static io.trino.plugin.hive.HiveSessionProperties.isParquetOptimizedReaderEnabled; import static io.trino.plugin.hive.HiveSessionProperties.isParquetOptimizedWriterValidate; import static io.trino.plugin.hive.util.HiveClassNames.MAPRED_PARQUET_OUTPUT_FORMAT_CLASS; import static io.trino.plugin.hive.util.HiveUtil.getColumnNames; @@ -152,7 +151,6 @@ public Optional createFileWriter( fileInputColumnIndexes, compressionCodec.getParquetCompressionCodec(), nodeVersion.toString(), - isParquetOptimizedReaderEnabled(session), Optional.of(parquetTimeZone), validationInputFactory)); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java index 30b356e11aa6..908aea2697d2 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java @@ -90,8 +90,6 @@ import static io.trino.plugin.hive.HiveSessionProperties.getParquetMaxReadBlockRowCount; import static io.trino.plugin.hive.HiveSessionProperties.getParquetMaxReadBlockSize; import static io.trino.plugin.hive.HiveSessionProperties.isParquetIgnoreStatistics; -import static io.trino.plugin.hive.HiveSessionProperties.isParquetOptimizedNestedReaderEnabled; -import static io.trino.plugin.hive.HiveSessionProperties.isParquetOptimizedReaderEnabled; import static io.trino.plugin.hive.HiveSessionProperties.isParquetUseColumnIndex; import static io.trino.plugin.hive.HiveSessionProperties.isUseParquetColumnNames; import static io.trino.plugin.hive.HiveSessionProperties.useParquetBloomFilter; @@ -194,9 +192,7 @@ public Optional createPageSource( .withMaxReadBlockSize(getParquetMaxReadBlockSize(session)) .withMaxReadBlockRowCount(getParquetMaxReadBlockRowCount(session)) .withUseColumnIndex(isParquetUseColumnIndex(session)) - .withBloomFilter(useParquetBloomFilter(session)) - .withBatchColumnReaders(isParquetOptimizedReaderEnabled(session)) - .withBatchNestedColumnReaders(isParquetOptimizedNestedReaderEnabled(session)), + .withBloomFilter(useParquetBloomFilter(session)), Optional.empty(), domainCompactionThreshold)); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetReaderConfig.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetReaderConfig.java index 187b1790846f..e2df6808981b 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetReaderConfig.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetReaderConfig.java @@ -27,6 +27,8 @@ @DefunctConfig({ "hive.parquet.fail-on-corrupted-statistics", "parquet.fail-on-corrupted-statistics", + "parquet.optimized-reader.enabled", + "parquet.optimized-nested-reader.enabled" }) public class ParquetReaderConfig { @@ -116,32 +118,6 @@ public boolean isUseColumnIndex() return options.isUseColumnIndex(); } - @Config("parquet.optimized-reader.enabled") - @ConfigDescription("Use optimized Parquet reader") - public ParquetReaderConfig setOptimizedReaderEnabled(boolean optimizedReaderEnabled) - { - options = options.withBatchColumnReaders(optimizedReaderEnabled); - return this; - } - - public boolean isOptimizedReaderEnabled() - { - return options.useBatchColumnReaders(); - } - - @Config("parquet.optimized-nested-reader.enabled") - @ConfigDescription("Use optimized Parquet reader for nested columns") - public ParquetReaderConfig setOptimizedNestedReaderEnabled(boolean optimizedNestedReaderEnabled) - { - options = options.withBatchNestedColumnReaders(optimizedNestedReaderEnabled); - return this; - } - - public boolean isOptimizedNestedReaderEnabled() - { - return options.useBatchNestedColumnReaders(); - } - @Config("parquet.use-bloom-filter") @ConfigDescription("Use Parquet Bloom filters") public ParquetReaderConfig setUseBloomFilter(boolean useBloomFilter) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestParquetPageSkipping.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestParquetPageSkipping.java deleted file mode 100644 index 89599ae8382d..000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestParquetPageSkipping.java +++ /dev/null @@ -1,306 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.google.common.io.Resources; -import io.trino.Session; -import io.trino.execution.QueryStats; -import io.trino.operator.OperatorStats; -import io.trino.spi.QueryId; -import io.trino.spi.metrics.Count; -import io.trino.spi.metrics.Metric; -import io.trino.testing.AbstractTestQueryFramework; -import io.trino.testing.DistributedQueryRunner; -import io.trino.testing.MaterializedResult; -import io.trino.testing.MaterializedResultWithQueryId; -import org.intellij.lang.annotations.Language; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.io.File; -import java.net.URISyntaxException; -import java.util.Map; - -import static com.google.common.collect.MoreCollectors.onlyElement; -import static io.trino.parquet.reader.ParquetReader.COLUMN_INDEX_ROWS_FILTERED; -import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; -import static io.trino.testing.TestingNames.randomNameSuffix; -import static java.lang.String.format; -import static org.assertj.core.api.Assertions.assertThat; - -public abstract class AbstractTestParquetPageSkipping - extends AbstractTestQueryFramework -{ - private void buildSortedTables(String tableName, String sortByColumnName, String sortByColumnType) - { - String createTableTemplate = - "CREATE TABLE %s ( " + - " orderkey bigint, " + - " custkey bigint, " + - " orderstatus varchar(1), " + - " totalprice double, " + - " orderdate date, " + - " orderpriority varchar(15), " + - " clerk varchar(15), " + - " shippriority integer, " + - " comment varchar(79), " + - " rvalues double array " + - ") " + - "WITH ( " + - " format = 'PARQUET', " + - " bucketed_by = array['orderstatus'], " + - " bucket_count = 1, " + - " sorted_by = array['%s'] " + - ")"; - createTableTemplate = createTableTemplate.replaceFirst(sortByColumnName + "[ ]+([^,]*)", sortByColumnName + " " + sortByColumnType); - - assertUpdate(format( - createTableTemplate, - tableName, - sortByColumnName)); - String catalog = getSession().getCatalog().orElseThrow(); - assertUpdate( - Session.builder(getSession()) - .setCatalogSessionProperty(catalog, "parquet_writer_page_size", "10000B") - .setCatalogSessionProperty(catalog, "parquet_writer_block_size", "2GB") - .build(), - format("INSERT INTO %s SELECT *, ARRAY[rand(), rand(), rand()] FROM tpch.tiny.orders", tableName), - 15000); - } - - @Test - public void testRowGroupPruningFromPageIndexes() - throws Exception - { - String tableName = "test_row_group_pruning_" + randomNameSuffix(); - File parquetFile = new File(Resources.getResource("parquet_page_skipping/orders_sorted_by_totalprice").toURI()); - assertUpdate( - """ - CREATE TABLE %s ( - orderkey bigint, - custkey bigint, - orderstatus varchar(1), - totalprice double, - orderdate date, - orderpriority varchar(15), - clerk varchar(15), - shippriority integer, - comment varchar(79), - rvalues double array) - WITH ( - format = 'PARQUET', - external_location = '%s') - """.formatted(tableName, parquetFile.getAbsolutePath())); - - int rowCount = assertColumnIndexResults("SELECT * FROM " + tableName + " WHERE totalprice BETWEEN 100000 AND 131280 AND clerk = 'Clerk#000000624'"); - assertThat(rowCount).isGreaterThan(0); - - // `totalprice BETWEEN 51890 AND 51900` is chosen to lie between min/max values of row group - // but outside page level min/max boundaries to trigger pruning of row group using column index - assertRowGroupPruning("SELECT * FROM " + tableName + " WHERE totalprice BETWEEN 51890 AND 51900 AND orderkey > 0"); - assertUpdate("DROP TABLE " + tableName); - } - - @Test - public void testPageSkippingWithNonSequentialOffsets() - throws URISyntaxException - { - String tableName = "test_random_" + randomNameSuffix(); - File parquetFile = new File(Resources.getResource("parquet_page_skipping/random").toURI()); - assertUpdate(format( - "CREATE TABLE %s (col double) WITH (format = 'PARQUET', external_location = '%s')", - tableName, - parquetFile.getAbsolutePath())); - // These queries select a subset of pages which are stored at non-sequential offsets - // This reproduces the issue identified in https://github.com/trinodb/trino/issues/9097 - for (double i = 0; i < 1; i += 0.1) { - assertColumnIndexResults(format("SELECT * FROM %s WHERE col BETWEEN %f AND %f", tableName, i - 0.00001, i + 0.00001)); - } - assertUpdate("DROP TABLE " + tableName); - } - - @Test - public void testFilteringOnColumnNameWithDot() - throws URISyntaxException - { - String nameInSql = "\"a.dot\""; - String tableName = "test_column_name_with_dot_" + randomNameSuffix(); - - File parquetFile = new File(Resources.getResource("parquet_page_skipping/column_name_with_dot").toURI()); - assertUpdate(format( - "CREATE TABLE %s (key varchar(50), %s varchar(50)) WITH (format = 'PARQUET', external_location = '%s')", - tableName, - nameInSql, - parquetFile.getAbsolutePath())); - - assertQuery("SELECT key FROM " + tableName + " WHERE " + nameInSql + " IS NULL", "VALUES ('null value')"); - assertQuery("SELECT key FROM " + tableName + " WHERE " + nameInSql + " = 'abc'", "VALUES ('sample value')"); - - assertUpdate("DROP TABLE " + tableName); - } - - @Test(dataProvider = "dataType") - public void testPageSkipping(String sortByColumn, String sortByColumnType, Object[][] valuesArray) - { - String tableName = "test_page_skipping_" + randomNameSuffix(); - buildSortedTables(tableName, sortByColumn, sortByColumnType); - for (Object[] values : valuesArray) { - Object lowValue = values[0]; - Object middleLowValue = values[1]; - Object middleHighValue = values[2]; - Object highValue = values[3]; - assertColumnIndexResults(format("SELECT %s FROM %s WHERE %s = %s", sortByColumn, tableName, sortByColumn, middleLowValue)); - assertThat(assertColumnIndexResults(format("SELECT %s FROM %s WHERE %s < %s", sortByColumn, tableName, sortByColumn, lowValue))).isGreaterThan(0); - assertThat(assertColumnIndexResults(format("SELECT %s FROM %s WHERE %s > %s", sortByColumn, tableName, sortByColumn, highValue))).isGreaterThan(0); - assertThat(assertColumnIndexResults(format("SELECT %s FROM %s WHERE %s BETWEEN %s AND %s", sortByColumn, tableName, sortByColumn, middleLowValue, middleHighValue))).isGreaterThan(0); - // Tests synchronization of reading values across columns - assertColumnIndexResults(format("SELECT * FROM %s WHERE %s = %s", tableName, sortByColumn, middleLowValue)); - assertThat(assertColumnIndexResults(format("SELECT * FROM %s WHERE %s < %s", tableName, sortByColumn, lowValue))).isGreaterThan(0); - assertThat(assertColumnIndexResults(format("SELECT * FROM %s WHERE %s > %s", tableName, sortByColumn, highValue))).isGreaterThan(0); - assertThat(assertColumnIndexResults(format("SELECT * FROM %s WHERE %s BETWEEN %s AND %s", tableName, sortByColumn, middleLowValue, middleHighValue))).isGreaterThan(0); - // Nested data - assertColumnIndexResults(format("SELECT rvalues FROM %s WHERE %s IN (%s, %s, %s, %s)", tableName, sortByColumn, lowValue, middleLowValue, middleHighValue, highValue)); - // Without nested data - assertColumnIndexResults(format("SELECT orderkey, orderdate FROM %s WHERE %s IN (%s, %s, %s, %s)", tableName, sortByColumn, lowValue, middleLowValue, middleHighValue, highValue)); - } - assertUpdate("DROP TABLE " + tableName); - } - - @Test - public void testFilteringWithColumnIndex() - throws URISyntaxException - { - String tableName = "test_page_filtering_" + randomNameSuffix(); - File parquetFile = new File(Resources.getResource("parquet_page_skipping/lineitem_sorted_by_suppkey").toURI()); - assertUpdate(format( - "CREATE TABLE %s (suppkey bigint, extendedprice decimal(12, 2), shipmode varchar(10), comment varchar(44)) " + - "WITH (format = 'PARQUET', external_location = '%s')", - tableName, - parquetFile.getAbsolutePath())); - - verifyFilteringWithColumnIndex("SELECT * FROM " + tableName + " WHERE suppkey = 10"); - verifyFilteringWithColumnIndex("SELECT * FROM " + tableName + " WHERE suppkey BETWEEN 25 AND 35"); - verifyFilteringWithColumnIndex("SELECT * FROM " + tableName + " WHERE suppkey >= 60"); - verifyFilteringWithColumnIndex("SELECT * FROM " + tableName + " WHERE suppkey <= 40"); - verifyFilteringWithColumnIndex("SELECT * FROM " + tableName + " WHERE suppkey IN (25, 35, 50, 80)"); - - assertUpdate("DROP TABLE " + tableName); - } - - private void verifyFilteringWithColumnIndex(@Language("SQL") String query) - { - DistributedQueryRunner queryRunner = getDistributedQueryRunner(); - MaterializedResultWithQueryId resultWithoutColumnIndex = queryRunner.executeWithQueryId( - noParquetColumnIndexFiltering(getSession()), - query); - QueryStats queryStatsWithoutColumnIndex = getQueryStats(resultWithoutColumnIndex.getQueryId()); - assertThat(queryStatsWithoutColumnIndex.getPhysicalInputPositions()).isGreaterThan(0); - Map> metricsWithoutColumnIndex = getScanOperatorStats(resultWithoutColumnIndex.getQueryId()) - .getConnectorMetrics() - .getMetrics(); - assertThat(metricsWithoutColumnIndex).doesNotContainKey(COLUMN_INDEX_ROWS_FILTERED); - - MaterializedResultWithQueryId resultWithColumnIndex = queryRunner.executeWithQueryId(getSession(), query); - QueryStats queryStatsWithColumnIndex = getQueryStats(resultWithColumnIndex.getQueryId()); - assertThat(queryStatsWithColumnIndex.getPhysicalInputPositions()).isGreaterThan(0); - assertThat(queryStatsWithColumnIndex.getPhysicalInputPositions()) - .isLessThan(queryStatsWithoutColumnIndex.getPhysicalInputPositions()); - Map> metricsWithColumnIndex = getScanOperatorStats(resultWithColumnIndex.getQueryId()) - .getConnectorMetrics() - .getMetrics(); - assertThat(metricsWithColumnIndex).containsKey(COLUMN_INDEX_ROWS_FILTERED); - assertThat(((Count) metricsWithColumnIndex.get(COLUMN_INDEX_ROWS_FILTERED)).getTotal()) - .isGreaterThan(0); - - assertEqualsIgnoreOrder(resultWithColumnIndex.getResult(), resultWithoutColumnIndex.getResult()); - } - - private int assertColumnIndexResults(String query) - { - MaterializedResult withColumnIndexing = computeActual(query); - MaterializedResult withoutColumnIndexing = computeActual(noParquetColumnIndexFiltering(getSession()), query); - assertEqualsIgnoreOrder(withColumnIndexing, withoutColumnIndexing); - return withoutColumnIndexing.getRowCount(); - } - - private void assertRowGroupPruning(@Language("SQL") String sql) - { - assertQueryStats( - noParquetColumnIndexFiltering(getSession()), - sql, - queryStats -> { - assertThat(queryStats.getPhysicalInputPositions()).isGreaterThan(0); - assertThat(queryStats.getProcessedInputPositions()).isEqualTo(queryStats.getPhysicalInputPositions()); - }, - results -> assertThat(results.getRowCount()).isEqualTo(0)); - - assertQueryStats( - getSession(), - sql, - queryStats -> { - assertThat(queryStats.getPhysicalInputPositions()).isEqualTo(0); - assertThat(queryStats.getProcessedInputPositions()).isEqualTo(0); - }, - results -> assertThat(results.getRowCount()).isEqualTo(0)); - } - - @DataProvider - public Object[][] dataType() - { - return new Object[][] { - {"orderkey", "bigint", new Object[][] {{2, 7520, 7523, 14950}}}, - {"totalprice", "double", new Object[][] {{974.04, 131094.34, 131279.97, 406938.36}}}, - {"totalprice", "real", new Object[][] {{974.04, 131094.34, 131279.97, 406938.36}}}, - {"totalprice", "decimal(12,2)", new Object[][] { - {974.04, 131094.34, 131279.97, 406938.36}, - {973, 131095, 131280, 406950}, - {974.04123, 131094.34123, 131279.97012, 406938.36555}}}, - {"totalprice", "decimal(12,0)", new Object[][] { - {973, 131095, 131280, 406950}}}, - {"totalprice", "decimal(35,2)", new Object[][] { - {974.04, 131094.34, 131279.97, 406938.36}, - {973, 131095, 131280, 406950}, - {974.04123, 131094.34123, 131279.97012, 406938.36555}}}, - {"orderdate", "date", new Object[][] {{"DATE '1992-01-05'", "DATE '1995-10-13'", "DATE '1995-10-13'", "DATE '1998-07-29'"}}}, - {"orderdate", "timestamp", new Object[][] {{"TIMESTAMP '1992-01-05'", "TIMESTAMP '1995-10-13'", "TIMESTAMP '1995-10-14'", "TIMESTAMP '1998-07-29'"}}}, - {"clerk", "varchar(15)", new Object[][] {{"'Clerk#000000006'", "'Clerk#000000508'", "'Clerk#000000513'", "'Clerk#000000996'"}}}, - {"custkey", "integer", new Object[][] {{4, 634, 640, 1493}}}, - {"custkey", "smallint", new Object[][] {{4, 634, 640, 1493}}} - }; - } - - private Session noParquetColumnIndexFiltering(Session session) - { - return Session.builder(session) - .setCatalogSessionProperty(session.getCatalog().orElseThrow(), "parquet_use_column_index", "false") - .build(); - } - - private QueryStats getQueryStats(QueryId queryId) - { - return getDistributedQueryRunner().getCoordinator() - .getQueryManager() - .getFullQueryInfo(queryId) - .getQueryStats(); - } - - private OperatorStats getScanOperatorStats(QueryId queryId) - { - return getQueryStats(queryId) - .getOperatorSummaries() - .stream() - .filter(summary -> summary.getOperatorType().startsWith("TableScan") || summary.getOperatorType().startsWith("Scan")) - .collect(onlyElement()); - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java index dedbc21e1d67..782c9723e4d3 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java @@ -156,13 +156,6 @@ public static TestingConnectorSession getHiveSession(HiveConfig hiveConfig, Parq .build(); } - public static TestingConnectorSession getHiveSession(HiveConfig hiveConfig, ParquetReaderConfig parquetReaderConfig) - { - return TestingConnectorSession.builder() - .setPropertyMetadata(getHiveSessionProperties(hiveConfig, parquetReaderConfig).getSessionProperties()) - .build(); - } - public static HiveSessionProperties getHiveSessionProperties(HiveConfig hiveConfig) { return getHiveSessionProperties(hiveConfig, new OrcReaderConfig()); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkipping.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkipping.java index 255a89365d78..34b43d95fe5f 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkipping.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkipping.java @@ -14,10 +14,35 @@ package io.trino.plugin.hive; import com.google.common.collect.ImmutableMap; +import com.google.common.io.Resources; +import io.trino.Session; +import io.trino.execution.QueryStats; +import io.trino.operator.OperatorStats; +import io.trino.spi.QueryId; +import io.trino.spi.metrics.Count; +import io.trino.spi.metrics.Metric; +import io.trino.testing.AbstractTestQueryFramework; +import io.trino.testing.DistributedQueryRunner; +import io.trino.testing.MaterializedResult; +import io.trino.testing.MaterializedResultWithQueryId; import io.trino.testing.QueryRunner; +import org.intellij.lang.annotations.Language; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.net.URISyntaxException; +import java.util.Map; + +import static com.google.common.collect.MoreCollectors.onlyElement; +import static io.trino.parquet.reader.ParquetReader.COLUMN_INDEX_ROWS_FILTERED; +import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; +import static io.trino.testing.TestingNames.randomNameSuffix; +import static java.lang.String.format; +import static org.assertj.core.api.Assertions.assertThat; public class TestParquetPageSkipping - extends AbstractTestParquetPageSkipping + extends AbstractTestQueryFramework { @Override protected QueryRunner createQueryRunner() @@ -27,8 +52,269 @@ protected QueryRunner createQueryRunner() .setHiveProperties( ImmutableMap.of( "parquet.use-column-index", "true", - "parquet.max-buffer-size", "1MB", - "parquet.optimized-reader.enabled", "false")) + "parquet.max-buffer-size", "1MB")) + .build(); + } + + @Test + public void testRowGroupPruningFromPageIndexes() + throws Exception + { + String tableName = "test_row_group_pruning_" + randomNameSuffix(); + File parquetFile = new File(Resources.getResource("parquet_page_skipping/orders_sorted_by_totalprice").toURI()); + assertUpdate( + """ + CREATE TABLE %s ( + orderkey bigint, + custkey bigint, + orderstatus varchar(1), + totalprice double, + orderdate date, + orderpriority varchar(15), + clerk varchar(15), + shippriority integer, + comment varchar(79), + rvalues double array) + WITH ( + format = 'PARQUET', + external_location = '%s') + """.formatted(tableName, parquetFile.getAbsolutePath())); + + int rowCount = assertColumnIndexResults("SELECT * FROM " + tableName + " WHERE totalprice BETWEEN 100000 AND 131280 AND clerk = 'Clerk#000000624'"); + assertThat(rowCount).isGreaterThan(0); + + // `totalprice BETWEEN 51890 AND 51900` is chosen to lie between min/max values of row group + // but outside page level min/max boundaries to trigger pruning of row group using column index + assertRowGroupPruning("SELECT * FROM " + tableName + " WHERE totalprice BETWEEN 51890 AND 51900 AND orderkey > 0"); + assertUpdate("DROP TABLE " + tableName); + } + + @Test + public void testPageSkippingWithNonSequentialOffsets() + throws URISyntaxException + { + String tableName = "test_random_" + randomNameSuffix(); + File parquetFile = new File(Resources.getResource("parquet_page_skipping/random").toURI()); + assertUpdate(format( + "CREATE TABLE %s (col double) WITH (format = 'PARQUET', external_location = '%s')", + tableName, + parquetFile.getAbsolutePath())); + // These queries select a subset of pages which are stored at non-sequential offsets + // This reproduces the issue identified in https://github.com/trinodb/trino/issues/9097 + for (double i = 0; i < 1; i += 0.1) { + assertColumnIndexResults(format("SELECT * FROM %s WHERE col BETWEEN %f AND %f", tableName, i - 0.00001, i + 0.00001)); + } + assertUpdate("DROP TABLE " + tableName); + } + + @Test + public void testFilteringOnColumnNameWithDot() + throws URISyntaxException + { + String nameInSql = "\"a.dot\""; + String tableName = "test_column_name_with_dot_" + randomNameSuffix(); + + File parquetFile = new File(Resources.getResource("parquet_page_skipping/column_name_with_dot").toURI()); + assertUpdate(format( + "CREATE TABLE %s (key varchar(50), %s varchar(50)) WITH (format = 'PARQUET', external_location = '%s')", + tableName, + nameInSql, + parquetFile.getAbsolutePath())); + + assertQuery("SELECT key FROM " + tableName + " WHERE " + nameInSql + " IS NULL", "VALUES ('null value')"); + assertQuery("SELECT key FROM " + tableName + " WHERE " + nameInSql + " = 'abc'", "VALUES ('sample value')"); + + assertUpdate("DROP TABLE " + tableName); + } + + @Test(dataProvider = "dataType") + public void testPageSkipping(String sortByColumn, String sortByColumnType, Object[][] valuesArray) + { + String tableName = "test_page_skipping_" + randomNameSuffix(); + buildSortedTables(tableName, sortByColumn, sortByColumnType); + for (Object[] values : valuesArray) { + Object lowValue = values[0]; + Object middleLowValue = values[1]; + Object middleHighValue = values[2]; + Object highValue = values[3]; + assertColumnIndexResults(format("SELECT %s FROM %s WHERE %s = %s", sortByColumn, tableName, sortByColumn, middleLowValue)); + assertThat(assertColumnIndexResults(format("SELECT %s FROM %s WHERE %s < %s", sortByColumn, tableName, sortByColumn, lowValue))).isGreaterThan(0); + assertThat(assertColumnIndexResults(format("SELECT %s FROM %s WHERE %s > %s", sortByColumn, tableName, sortByColumn, highValue))).isGreaterThan(0); + assertThat(assertColumnIndexResults(format("SELECT %s FROM %s WHERE %s BETWEEN %s AND %s", sortByColumn, tableName, sortByColumn, middleLowValue, middleHighValue))).isGreaterThan(0); + // Tests synchronization of reading values across columns + assertColumnIndexResults(format("SELECT * FROM %s WHERE %s = %s", tableName, sortByColumn, middleLowValue)); + assertThat(assertColumnIndexResults(format("SELECT * FROM %s WHERE %s < %s", tableName, sortByColumn, lowValue))).isGreaterThan(0); + assertThat(assertColumnIndexResults(format("SELECT * FROM %s WHERE %s > %s", tableName, sortByColumn, highValue))).isGreaterThan(0); + assertThat(assertColumnIndexResults(format("SELECT * FROM %s WHERE %s BETWEEN %s AND %s", tableName, sortByColumn, middleLowValue, middleHighValue))).isGreaterThan(0); + // Nested data + assertColumnIndexResults(format("SELECT rvalues FROM %s WHERE %s IN (%s, %s, %s, %s)", tableName, sortByColumn, lowValue, middleLowValue, middleHighValue, highValue)); + // Without nested data + assertColumnIndexResults(format("SELECT orderkey, orderdate FROM %s WHERE %s IN (%s, %s, %s, %s)", tableName, sortByColumn, lowValue, middleLowValue, middleHighValue, highValue)); + } + assertUpdate("DROP TABLE " + tableName); + } + + @Test + public void testFilteringWithColumnIndex() + throws URISyntaxException + { + String tableName = "test_page_filtering_" + randomNameSuffix(); + File parquetFile = new File(Resources.getResource("parquet_page_skipping/lineitem_sorted_by_suppkey").toURI()); + assertUpdate(format( + "CREATE TABLE %s (suppkey bigint, extendedprice decimal(12, 2), shipmode varchar(10), comment varchar(44)) " + + "WITH (format = 'PARQUET', external_location = '%s')", + tableName, + parquetFile.getAbsolutePath())); + + verifyFilteringWithColumnIndex("SELECT * FROM " + tableName + " WHERE suppkey = 10"); + verifyFilteringWithColumnIndex("SELECT * FROM " + tableName + " WHERE suppkey BETWEEN 25 AND 35"); + verifyFilteringWithColumnIndex("SELECT * FROM " + tableName + " WHERE suppkey >= 60"); + verifyFilteringWithColumnIndex("SELECT * FROM " + tableName + " WHERE suppkey <= 40"); + verifyFilteringWithColumnIndex("SELECT * FROM " + tableName + " WHERE suppkey IN (25, 35, 50, 80)"); + + assertUpdate("DROP TABLE " + tableName); + } + + private void verifyFilteringWithColumnIndex(@Language("SQL") String query) + { + DistributedQueryRunner queryRunner = getDistributedQueryRunner(); + MaterializedResultWithQueryId resultWithoutColumnIndex = queryRunner.executeWithQueryId( + noParquetColumnIndexFiltering(getSession()), + query); + QueryStats queryStatsWithoutColumnIndex = getQueryStats(resultWithoutColumnIndex.getQueryId()); + assertThat(queryStatsWithoutColumnIndex.getPhysicalInputPositions()).isGreaterThan(0); + Map> metricsWithoutColumnIndex = getScanOperatorStats(resultWithoutColumnIndex.getQueryId()) + .getConnectorMetrics() + .getMetrics(); + assertThat(metricsWithoutColumnIndex).doesNotContainKey(COLUMN_INDEX_ROWS_FILTERED); + + MaterializedResultWithQueryId resultWithColumnIndex = queryRunner.executeWithQueryId(getSession(), query); + QueryStats queryStatsWithColumnIndex = getQueryStats(resultWithColumnIndex.getQueryId()); + assertThat(queryStatsWithColumnIndex.getPhysicalInputPositions()).isGreaterThan(0); + assertThat(queryStatsWithColumnIndex.getPhysicalInputPositions()) + .isLessThan(queryStatsWithoutColumnIndex.getPhysicalInputPositions()); + Map> metricsWithColumnIndex = getScanOperatorStats(resultWithColumnIndex.getQueryId()) + .getConnectorMetrics() + .getMetrics(); + assertThat(metricsWithColumnIndex).containsKey(COLUMN_INDEX_ROWS_FILTERED); + assertThat(((Count) metricsWithColumnIndex.get(COLUMN_INDEX_ROWS_FILTERED)).getTotal()) + .isGreaterThan(0); + + assertEqualsIgnoreOrder(resultWithColumnIndex.getResult(), resultWithoutColumnIndex.getResult()); + } + + private int assertColumnIndexResults(String query) + { + MaterializedResult withColumnIndexing = computeActual(query); + MaterializedResult withoutColumnIndexing = computeActual(noParquetColumnIndexFiltering(getSession()), query); + assertEqualsIgnoreOrder(withColumnIndexing, withoutColumnIndexing); + return withoutColumnIndexing.getRowCount(); + } + + private void assertRowGroupPruning(@Language("SQL") String sql) + { + assertQueryStats( + noParquetColumnIndexFiltering(getSession()), + sql, + queryStats -> { + assertThat(queryStats.getPhysicalInputPositions()).isGreaterThan(0); + assertThat(queryStats.getProcessedInputPositions()).isEqualTo(queryStats.getPhysicalInputPositions()); + }, + results -> assertThat(results.getRowCount()).isEqualTo(0)); + + assertQueryStats( + getSession(), + sql, + queryStats -> { + assertThat(queryStats.getPhysicalInputPositions()).isEqualTo(0); + assertThat(queryStats.getProcessedInputPositions()).isEqualTo(0); + }, + results -> assertThat(results.getRowCount()).isEqualTo(0)); + } + + @DataProvider + public Object[][] dataType() + { + return new Object[][] { + {"orderkey", "bigint", new Object[][] {{2, 7520, 7523, 14950}}}, + {"totalprice", "double", new Object[][] {{974.04, 131094.34, 131279.97, 406938.36}}}, + {"totalprice", "real", new Object[][] {{974.04, 131094.34, 131279.97, 406938.36}}}, + {"totalprice", "decimal(12,2)", new Object[][] { + {974.04, 131094.34, 131279.97, 406938.36}, + {973, 131095, 131280, 406950}, + {974.04123, 131094.34123, 131279.97012, 406938.36555}}}, + {"totalprice", "decimal(12,0)", new Object[][] { + {973, 131095, 131280, 406950}}}, + {"totalprice", "decimal(35,2)", new Object[][] { + {974.04, 131094.34, 131279.97, 406938.36}, + {973, 131095, 131280, 406950}, + {974.04123, 131094.34123, 131279.97012, 406938.36555}}}, + {"orderdate", "date", new Object[][] {{"DATE '1992-01-05'", "DATE '1995-10-13'", "DATE '1995-10-13'", "DATE '1998-07-29'"}}}, + {"orderdate", "timestamp", new Object[][] {{"TIMESTAMP '1992-01-05'", "TIMESTAMP '1995-10-13'", "TIMESTAMP '1995-10-14'", "TIMESTAMP '1998-07-29'"}}}, + {"clerk", "varchar(15)", new Object[][] {{"'Clerk#000000006'", "'Clerk#000000508'", "'Clerk#000000513'", "'Clerk#000000996'"}}}, + {"custkey", "integer", new Object[][] {{4, 634, 640, 1493}}}, + {"custkey", "smallint", new Object[][] {{4, 634, 640, 1493}}} + }; + } + + private Session noParquetColumnIndexFiltering(Session session) + { + return Session.builder(session) + .setCatalogSessionProperty(session.getCatalog().orElseThrow(), "parquet_use_column_index", "false") .build(); } + + private QueryStats getQueryStats(QueryId queryId) + { + return getDistributedQueryRunner().getCoordinator() + .getQueryManager() + .getFullQueryInfo(queryId) + .getQueryStats(); + } + + private OperatorStats getScanOperatorStats(QueryId queryId) + { + return getQueryStats(queryId) + .getOperatorSummaries() + .stream() + .filter(summary -> summary.getOperatorType().startsWith("TableScan") || summary.getOperatorType().startsWith("Scan")) + .collect(onlyElement()); + } + + private void buildSortedTables(String tableName, String sortByColumnName, String sortByColumnType) + { + String createTableTemplate = + "CREATE TABLE %s ( " + + " orderkey bigint, " + + " custkey bigint, " + + " orderstatus varchar(1), " + + " totalprice double, " + + " orderdate date, " + + " orderpriority varchar(15), " + + " clerk varchar(15), " + + " shippriority integer, " + + " comment varchar(79), " + + " rvalues double array " + + ") " + + "WITH ( " + + " format = 'PARQUET', " + + " bucketed_by = array['orderstatus'], " + + " bucket_count = 1, " + + " sorted_by = array['%s'] " + + ")"; + createTableTemplate = createTableTemplate.replaceFirst(sortByColumnName + "[ ]+([^,]*)", sortByColumnName + " " + sortByColumnType); + + assertUpdate(format( + createTableTemplate, + tableName, + sortByColumnName)); + String catalog = getSession().getCatalog().orElseThrow(); + assertUpdate( + Session.builder(getSession()) + .setCatalogSessionProperty(catalog, "parquet_writer_page_size", "10000B") + .setCatalogSessionProperty(catalog, "parquet_writer_block_size", "2GB") + .build(), + format("INSERT INTO %s SELECT *, ARRAY[rand(), rand(), rand()] FROM tpch.tiny.orders", tableName), + 15000); + } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkippingWithOptimizedReader.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkippingWithOptimizedReader.java deleted file mode 100644 index f68b673d9137..000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkippingWithOptimizedReader.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.google.common.collect.ImmutableMap; -import io.trino.testing.QueryRunner; - -public class TestParquetPageSkippingWithOptimizedReader - extends AbstractTestParquetPageSkipping -{ - @Override - protected QueryRunner createQueryRunner() - throws Exception - { - return HiveQueryRunner.builder() - .setHiveProperties( - ImmutableMap.of( - "parquet.use-column-index", "true", - "parquet.max-buffer-size", "1MB", - "parquet.optimized-reader.enabled", "true")) - .build(); - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/BenchmarkFileFormat.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/BenchmarkFileFormat.java index 9d96de38af1d..6dd6bb4f4554 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/BenchmarkFileFormat.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/BenchmarkFileFormat.java @@ -22,7 +22,6 @@ public enum BenchmarkFileFormat TRINO_RCTEXT(StandardFileFormats.TRINO_RCTEXT), TRINO_ORC(StandardFileFormats.TRINO_ORC), TRINO_PARQUET(StandardFileFormats.TRINO_PARQUET), - TRINO_OPTIMIZED_PARQUET(StandardFileFormats.TRINO_PARQUET), /**/; private final FileFormat format; diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/BenchmarkHiveFileFormat.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/BenchmarkHiveFileFormat.java index 4670320d0e55..6740ceb873a4 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/BenchmarkHiveFileFormat.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/BenchmarkHiveFileFormat.java @@ -18,7 +18,6 @@ import io.trino.hadoop.HadoopNative; import io.trino.plugin.hive.HiveCompressionCodec; import io.trino.plugin.hive.HiveConfig; -import io.trino.plugin.hive.parquet.ParquetReaderConfig; import io.trino.spi.Page; import io.trino.spi.PageBuilder; import io.trino.spi.block.ArrayBlockBuilder; @@ -58,7 +57,6 @@ import static io.trino.jmh.Benchmarks.benchmark; import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; import static io.trino.plugin.hive.HiveTestUtils.getHiveSession; -import static io.trino.plugin.hive.benchmark.BenchmarkFileFormat.TRINO_OPTIMIZED_PARQUET; import static io.trino.plugin.hive.benchmark.BenchmarkFileFormatsUtils.MIN_DATA_SIZE; import static io.trino.plugin.hive.benchmark.BenchmarkFileFormatsUtils.createTempDir; import static io.trino.plugin.hive.benchmark.BenchmarkFileFormatsUtils.createTpchDataSet; @@ -80,11 +78,7 @@ @SuppressWarnings({"UseOfSystemOutOrSystemErr", "ResultOfMethodCallIgnored"}) public class BenchmarkHiveFileFormat { - private static final ConnectorSession SESSION = getHiveSession( - new HiveConfig(), new ParquetReaderConfig().setOptimizedReaderEnabled(false)); - - private static final ConnectorSession SESSION_OPTIMIZED_PARQUET_READER = getHiveSession( - new HiveConfig(), new ParquetReaderConfig().setOptimizedReaderEnabled(true)); + private static final ConnectorSession SESSION = getHiveSession(new HiveConfig()); static { HadoopNative.requireHadoopNative(); @@ -114,8 +108,7 @@ public class BenchmarkHiveFileFormat "TRINO_RCBINARY", "TRINO_RCTEXT", "TRINO_ORC", - "TRINO_PARQUET", - "TRINO_OPTIMIZED_PARQUET"}) + "TRINO_PARQUET"}) private BenchmarkFileFormat benchmarkFileFormat; private FileFormat fileFormat; @@ -172,7 +165,7 @@ public List read(CompressionCounter counter) } List pages = new ArrayList<>(100); try (ConnectorPageSource pageSource = fileFormat.createFileFormatReader( - TRINO_OPTIMIZED_PARQUET.equals(benchmarkFileFormat) ? SESSION_OPTIMIZED_PARQUET_READER : SESSION, + SESSION, HDFS_ENVIRONMENT, dataFile, data.getColumnNames(), diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java index ee450abc9663..404249cb6ff4 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java @@ -206,7 +206,6 @@ public PrestoParquetFormatWriter(File targetFile, List columnNames, List ParquetWriterOptions.builder().build(), compressionCodec.getParquetCompressionCodec(), "test-version", - false, Optional.of(DateTimeZone.getDefault()), Optional.empty()); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/TestHiveFileFormatBenchmark.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/TestHiveFileFormatBenchmark.java index 275cabcc5aac..71934c6c02bb 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/TestHiveFileFormatBenchmark.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/TestHiveFileFormatBenchmark.java @@ -21,8 +21,8 @@ import java.io.IOException; import static io.trino.plugin.hive.HiveCompressionCodec.SNAPPY; -import static io.trino.plugin.hive.benchmark.BenchmarkFileFormat.TRINO_OPTIMIZED_PARQUET; import static io.trino.plugin.hive.benchmark.BenchmarkFileFormat.TRINO_ORC; +import static io.trino.plugin.hive.benchmark.BenchmarkFileFormat.TRINO_PARQUET; import static io.trino.plugin.hive.benchmark.BenchmarkFileFormat.TRINO_RCBINARY; import static io.trino.plugin.hive.benchmark.BenchmarkHiveFileFormat.DataSet.LARGE_MAP_VARCHAR_DOUBLE; import static io.trino.plugin.hive.benchmark.BenchmarkHiveFileFormat.DataSet.LINEITEM; @@ -36,7 +36,7 @@ public void testSomeFormats() { executeBenchmark(LINEITEM, SNAPPY, TRINO_RCBINARY); executeBenchmark(LINEITEM, SNAPPY, TRINO_ORC); - executeBenchmark(LINEITEM, SNAPPY, TRINO_OPTIMIZED_PARQUET); + executeBenchmark(LINEITEM, SNAPPY, TRINO_PARQUET); executeBenchmark(MAP_VARCHAR_DOUBLE, SNAPPY, TRINO_RCBINARY); executeBenchmark(MAP_VARCHAR_DOUBLE, SNAPPY, TRINO_ORC); executeBenchmark(LARGE_MAP_VARCHAR_DOUBLE, SNAPPY, TRINO_RCBINARY); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetTester.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetTester.java index 5042ab9eb738..c3883e325b10 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetTester.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetTester.java @@ -157,11 +157,7 @@ public class ParquetTester { private static final int MAX_PRECISION_INT64 = toIntExact(maxPrecision(8)); - private static final ConnectorSession SESSION = getHiveSession( - createHiveConfig(false), new ParquetReaderConfig().setOptimizedReaderEnabled(false)); - - private static final ConnectorSession SESSION_OPTIMIZED_READER = getHiveSession( - createHiveConfig(false), new ParquetReaderConfig().setOptimizedReaderEnabled(true)); + private static final ConnectorSession SESSION = getHiveSession(createHiveConfig(false)); private static final ConnectorSession SESSION_USE_NAME = getHiveSession(createHiveConfig(true)); @@ -187,23 +183,13 @@ public static ParquetTester quickParquetTester() StandardFileFormats.TRINO_PARQUET); } - public static ParquetTester quickOptimizedParquetTester() - { - return new ParquetTester( - ImmutableSet.of(GZIP), - ImmutableSet.of(GZIP), - ImmutableSet.of(PARQUET_1_0), - ImmutableSet.of(SESSION_OPTIMIZED_READER), - StandardFileFormats.TRINO_PARQUET); - } - public static ParquetTester fullParquetTester() { return new ParquetTester( ImmutableSet.of(GZIP, UNCOMPRESSED, SNAPPY, LZO, LZ4, ZSTD), ImmutableSet.of(GZIP, UNCOMPRESSED, SNAPPY, ZSTD), ImmutableSet.copyOf(WriterVersion.values()), - ImmutableSet.of(SESSION, SESSION_USE_NAME, SESSION_OPTIMIZED_READER), + ImmutableSet.of(SESSION, SESSION_USE_NAME), StandardFileFormats.TRINO_PARQUET); } @@ -447,52 +433,49 @@ void assertMaxReadBytes( throws Exception { CompressionCodec compressionCodec = UNCOMPRESSED; - for (boolean optimizedReaderEnabled : ImmutableList.of(true, false)) { - HiveSessionProperties hiveSessionProperties = new HiveSessionProperties( - new HiveConfig() - .setHiveStorageFormat(HiveStorageFormat.PARQUET) - .setUseParquetColumnNames(false), - new HiveFormatsConfig(), - new OrcReaderConfig(), - new OrcWriterConfig(), - new ParquetReaderConfig() - .setMaxReadBlockSize(maxReadBlockSize) - .setOptimizedReaderEnabled(optimizedReaderEnabled), - new ParquetWriterConfig()); - ConnectorSession session = TestingConnectorSession.builder() - .setPropertyMetadata(hiveSessionProperties.getSessionProperties()) - .build(); - - try (TempFile tempFile = new TempFile("test", "parquet")) { - JobConf jobConf = new JobConf(newEmptyConfiguration()); - jobConf.setEnum(COMPRESSION, compressionCodec); - jobConf.setBoolean(ENABLE_DICTIONARY, true); - jobConf.setEnum(WRITER_VERSION, PARQUET_1_0); - writeParquetColumn( - jobConf, - tempFile.getFile(), - compressionCodec, - createTableProperties(columnNames, objectInspectors), - getStandardStructObjectInspector(columnNames, objectInspectors), - getIterators(writeValues), - parquetSchema, - false, - DateTimeZone.getDefault()); - - Iterator[] expectedValues = getIterators(readValues); - try (ConnectorPageSource pageSource = fileFormat.createFileFormatReader( - session, - HDFS_ENVIRONMENT, - tempFile.getFile(), - columnNames, - columnTypes)) { - assertPageSource( - columnTypes, - expectedValues, - pageSource, - Optional.of(getParquetMaxReadBlockSize(session).toBytes())); - assertFalse(stream(expectedValues).allMatch(Iterator::hasNext)); - } + HiveSessionProperties hiveSessionProperties = new HiveSessionProperties( + new HiveConfig() + .setHiveStorageFormat(HiveStorageFormat.PARQUET) + .setUseParquetColumnNames(false), + new HiveFormatsConfig(), + new OrcReaderConfig(), + new OrcWriterConfig(), + new ParquetReaderConfig() + .setMaxReadBlockSize(maxReadBlockSize), + new ParquetWriterConfig()); + ConnectorSession session = TestingConnectorSession.builder() + .setPropertyMetadata(hiveSessionProperties.getSessionProperties()) + .build(); + + try (TempFile tempFile = new TempFile("test", "parquet")) { + JobConf jobConf = new JobConf(newEmptyConfiguration()); + jobConf.setEnum(COMPRESSION, compressionCodec); + jobConf.setBoolean(ENABLE_DICTIONARY, true); + jobConf.setEnum(WRITER_VERSION, PARQUET_1_0); + writeParquetColumn( + jobConf, + tempFile.getFile(), + compressionCodec, + createTableProperties(columnNames, objectInspectors), + getStandardStructObjectInspector(columnNames, objectInspectors), + getIterators(writeValues), + parquetSchema, + false, + DateTimeZone.getDefault()); + + Iterator[] expectedValues = getIterators(readValues); + try (ConnectorPageSource pageSource = fileFormat.createFileFormatReader( + session, + HDFS_ENVIRONMENT, + tempFile.getFile(), + columnNames, + columnTypes)) { + assertPageSource( + columnTypes, + expectedValues, + pageSource, + Optional.of(getParquetMaxReadBlockSize(session).toBytes())); + assertFalse(stream(expectedValues).allMatch(Iterator::hasNext)); } } } @@ -793,7 +776,6 @@ private static void writeParquetColumnTrino( .build(), compressionCodec, "test-version", - false, Optional.of(DateTimeZone.getDefault()), Optional.of(new ParquetWriteValidationBuilder(types, columnNames))); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestOptimizedParquetReader.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestOptimizedParquetReader.java deleted file mode 100644 index 3894ba8029c7..000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestOptimizedParquetReader.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.parquet; - -import org.testng.annotations.Test; - -// Failing on multiple threads because of org.apache.hadoop.hive.ql.io.parquet.write.ParquetRecordWriterWrapper -// uses a single record writer across all threads. -// For example org.apache.parquet.column.values.factory.DefaultValuesWriterFactory#DEFAULT_V1_WRITER_FACTORY is shared mutable state. -@Test(singleThreaded = true) -public class TestOptimizedParquetReader - extends AbstractTestParquetReader -{ - public TestOptimizedParquetReader() - { - super(ParquetTester.quickOptimizedParquetTester()); - } - - @Test - public void forceTestNgToRespectSingleThreaded() - { - // TODO: Remove after updating TestNG to 7.4.0+ (https://github.com/trinodb/trino/issues/8571) - // TestNG doesn't enforce @Test(singleThreaded = true) when tests are defined in base class. According to - // https://github.com/cbeust/testng/issues/2361#issuecomment-688393166 a workaround it to add a dummy test to the leaf test class. - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetDecimalScaling.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetDecimalScaling.java index 982e4a8b8154..56aa96534f7a 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetDecimalScaling.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetDecimalScaling.java @@ -15,7 +15,6 @@ import com.google.common.base.Joiner; import com.google.common.collect.ImmutableList; -import io.trino.Session; import io.trino.plugin.hive.HiveQueryRunner; import io.trino.plugin.hive.parquet.write.TestingMapredParquetOutputFormat; import io.trino.testing.AbstractTestQueryFramework; @@ -305,8 +304,7 @@ public void testReadingNonRescalableDecimals( @Language("SQL") String query = format("SELECT * FROM tpch.%s", tableName); @Language("RegExp") String expectedMessage = format("Cannot cast DECIMAL\\(%d, %d\\) '.*' to DECIMAL\\(%d, %d\\)", precision, scale, schemaPrecision, schemaScale); - assertQueryFails(optimizedParquetReaderEnabled(false), query, expectedMessage); - assertQueryFails(optimizedParquetReaderEnabled(true), query, expectedMessage); + assertQueryFails(query, expectedMessage); dropTable(tableName); } @@ -360,8 +358,7 @@ public void testParquetLongFixedLenByteArrayWithTrinoShortDecimal( "Could not read unscaled value %s into a short decimal from column .*", new BigDecimal(writeValue).unscaledValue()); - assertQueryFails(optimizedParquetReaderEnabled(false), query, expectedMessage); - assertQueryFails(optimizedParquetReaderEnabled(true), query, expectedMessage); + assertQueryFails(query, expectedMessage); } else { assertValues(tableName, schemaScale, ImmutableList.of(writeValue)); @@ -398,13 +395,7 @@ protected void dropTable(String tableName) private void assertValues(String tableName, int scale, List expected) { - assertValues(optimizedParquetReaderEnabled(false), tableName, scale, expected); - assertValues(optimizedParquetReaderEnabled(true), tableName, scale, expected); - } - - private void assertValues(Session session, String tableName, int scale, List expected) - { - MaterializedResult materializedRows = computeActual(session, format("SELECT value FROM tpch.%s", tableName)); + MaterializedResult materializedRows = computeActual(format("SELECT value FROM tpch.%s", tableName)); List actualValues = materializedRows.getMaterializedRows().stream() .map(row -> row.getField(0)) @@ -420,13 +411,7 @@ private void assertValues(Session session, String tableName, int scale, List expected) { - assertRoundedValues(optimizedParquetReaderEnabled(false), tableName, scale, expected); - assertRoundedValues(optimizedParquetReaderEnabled(true), tableName, scale, expected); - } - - private void assertRoundedValues(Session session, String tableName, int scale, List expected) - { - MaterializedResult materializedRows = computeActual(session, format("SELECT value FROM tpch.%s", tableName)); + MaterializedResult materializedRows = computeActual(format("SELECT value FROM tpch.%s", tableName)); List actualValues = materializedRows.getMaterializedRows().stream() .map(row -> row.getField(0)) @@ -539,14 +524,6 @@ private static Object[][] withWriterVersion(Object[][] args) return cartesianProduct(args, versions); } - private Session optimizedParquetReaderEnabled(boolean enabled) - { - Session session = getSession(); - return Session.builder(session) - .setCatalogSessionProperty(session.getCatalog().orElseThrow(), "parquet_optimized_reader_enabled", Boolean.toString(enabled)) - .build(); - } - protected static class ParquetDecimalInsert { private final String columnName; diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetReaderConfig.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetReaderConfig.java index afa1f038ffb1..fc8975936cba 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetReaderConfig.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetReaderConfig.java @@ -37,8 +37,6 @@ public void testDefaults() .setMaxMergeDistance(DataSize.of(1, MEGABYTE)) .setMaxBufferSize(DataSize.of(8, MEGABYTE)) .setUseColumnIndex(true) - .setOptimizedReaderEnabled(true) - .setOptimizedNestedReaderEnabled(true) .setUseBloomFilter(true)); } @@ -52,8 +50,6 @@ public void testExplicitPropertyMappings() .put("parquet.max-buffer-size", "1431kB") .put("parquet.max-merge-distance", "342kB") .put("parquet.use-column-index", "false") - .put("parquet.optimized-reader.enabled", "false") - .put("parquet.optimized-nested-reader.enabled", "false") .put("parquet.use-bloom-filter", "false") .buildOrThrow(); @@ -64,8 +60,6 @@ public void testExplicitPropertyMappings() .setMaxBufferSize(DataSize.of(1431, KILOBYTE)) .setMaxMergeDistance(DataSize.of(342, KILOBYTE)) .setUseColumnIndex(false) - .setOptimizedReaderEnabled(false) - .setOptimizedNestedReaderEnabled(false) .setUseBloomFilter(false); assertFullMapping(properties, expected); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestTimestamp.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestTimestamp.java index 93e96e0027a7..2d7cd1f5ca2c 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestTimestamp.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestTimestamp.java @@ -128,11 +128,7 @@ private static void testRoundTrip(MessageType parquetSchema, Iterable writ false, DateTimeZone.getDefault()); - ConnectorSession session = getHiveSession(new HiveConfig(), new ParquetReaderConfig().setOptimizedReaderEnabled(false)); - testReadingAs(createTimestampType(timestamp.getPrecision()), session, tempFile, columnNames, timestampReadValues); - testReadingAs(BIGINT, session, tempFile, columnNames, writeValues); - - session = getHiveSession(new HiveConfig(), new ParquetReaderConfig().setOptimizedReaderEnabled(true)); + ConnectorSession session = getHiveSession(new HiveConfig()); testReadingAs(createTimestampType(timestamp.getPrecision()), session, tempFile, columnNames, timestampReadValues); testReadingAs(BIGINT, session, tempFile, columnNames, writeValues); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestTimestampMicros.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestTimestampMicros.java index 5d9f20ad13aa..9f1a9584f330 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestTimestampMicros.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestTimestampMicros.java @@ -47,7 +47,6 @@ import static io.trino.plugin.hive.HiveType.HIVE_TIMESTAMP; import static io.trino.spi.type.TimestampType.createTimestampType; import static io.trino.spi.type.TimestampWithTimeZoneType.createTimestampWithTimeZoneType; -import static io.trino.testing.DataProviders.cartesianProduct; import static io.trino.testing.MaterializedResult.materializeSourceDataStream; import static org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB; import static org.assertj.core.api.Assertions.assertThat; @@ -55,12 +54,10 @@ public class TestTimestampMicros { @Test(dataProvider = "testTimestampMicrosDataProvider") - public void testTimestampMicros(HiveTimestampPrecision timestampPrecision, LocalDateTime expected, boolean useOptimizedParquetReader) + public void testTimestampMicros(HiveTimestampPrecision timestampPrecision, LocalDateTime expected) throws Exception { - ConnectorSession session = getHiveSession( - new HiveConfig().setTimestampPrecision(timestampPrecision), - new ParquetReaderConfig().setOptimizedReaderEnabled(useOptimizedParquetReader)); + ConnectorSession session = getHiveSession(new HiveConfig().setTimestampPrecision(timestampPrecision)); File parquetFile = new File(Resources.getResource("issue-5483.parquet").toURI()); Type columnType = createTimestampType(timestampPrecision.getPrecision()); @@ -73,12 +70,10 @@ public void testTimestampMicros(HiveTimestampPrecision timestampPrecision, Local } @Test(dataProvider = "testTimestampMicrosDataProvider") - public void testTimestampMicrosAsTimestampWithTimeZone(HiveTimestampPrecision timestampPrecision, LocalDateTime expected, boolean useOptimizedParquetReader) + public void testTimestampMicrosAsTimestampWithTimeZone(HiveTimestampPrecision timestampPrecision, LocalDateTime expected) throws Exception { - ConnectorSession session = getHiveSession( - new HiveConfig().setTimestampPrecision(timestampPrecision), - new ParquetReaderConfig().setOptimizedReaderEnabled(useOptimizedParquetReader)); + ConnectorSession session = getHiveSession(new HiveConfig().setTimestampPrecision(timestampPrecision)); File parquetFile = new File(Resources.getResource("issue-5483.parquet").toURI()); Type columnType = createTimestampWithTimeZoneType(timestampPrecision.getPrecision()); @@ -93,13 +88,10 @@ public void testTimestampMicrosAsTimestampWithTimeZone(HiveTimestampPrecision ti @DataProvider public static Object[][] testTimestampMicrosDataProvider() { - return cartesianProduct( - new Object[][] { - {HiveTimestampPrecision.MILLISECONDS, LocalDateTime.parse("2020-10-12T16:26:02.907")}, - {HiveTimestampPrecision.MICROSECONDS, LocalDateTime.parse("2020-10-12T16:26:02.906668")}, - {HiveTimestampPrecision.NANOSECONDS, LocalDateTime.parse("2020-10-12T16:26:02.906668")}, - }, - new Object[][] {{true}, {false}}); + return new Object[][] { + {HiveTimestampPrecision.MILLISECONDS, LocalDateTime.parse("2020-10-12T16:26:02.907")}, + {HiveTimestampPrecision.MICROSECONDS, LocalDateTime.parse("2020-10-12T16:26:02.906668")}, + {HiveTimestampPrecision.NANOSECONDS, LocalDateTime.parse("2020-10-12T16:26:02.906668")}}; } private ConnectorPageSource createPageSource(ConnectorSession session, File parquetFile, String columnName, HiveType columnHiveType, Type columnType) diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java index 9e888c6a6a93..b38ba3e02a81 100644 --- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java +++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java @@ -86,8 +86,6 @@ import static io.trino.plugin.hudi.HudiErrorCode.HUDI_CURSOR_ERROR; import static io.trino.plugin.hudi.HudiErrorCode.HUDI_INVALID_PARTITION_VALUE; import static io.trino.plugin.hudi.HudiErrorCode.HUDI_UNSUPPORTED_FILE_FORMAT; -import static io.trino.plugin.hudi.HudiSessionProperties.isParquetOptimizedNestedReaderEnabled; -import static io.trino.plugin.hudi.HudiSessionProperties.isParquetOptimizedReaderEnabled; import static io.trino.plugin.hudi.HudiSessionProperties.shouldUseParquetColumnNames; import static io.trino.plugin.hudi.HudiUtil.getHudiFileFormat; import static io.trino.spi.predicate.Utils.nativeValueToBlock; @@ -167,8 +165,7 @@ public ConnectorPageSource createPageSource( split, inputFile, dataSourceStats, - options.withBatchColumnReaders(isParquetOptimizedReaderEnabled(session)) - .withBatchNestedColumnReaders(isParquetOptimizedNestedReaderEnabled(session)), + options, timeZone); return new HudiPageSource( diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSessionProperties.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSessionProperties.java index bb5400b1e17e..7d76398f6fe7 100644 --- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSessionProperties.java +++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSessionProperties.java @@ -17,7 +17,6 @@ import com.google.inject.Inject; import io.airlift.units.DataSize; import io.trino.plugin.base.session.SessionPropertiesProvider; -import io.trino.plugin.hive.parquet.ParquetReaderConfig; import io.trino.spi.TrinoException; import io.trino.spi.connector.ConnectorSession; import io.trino.spi.session.PropertyMetadata; @@ -42,8 +41,6 @@ public class HudiSessionProperties private static final String COLUMNS_TO_HIDE = "columns_to_hide"; private static final String METADATA_ENABLED = "metadata_enabled"; private static final String USE_PARQUET_COLUMN_NAMES = "use_parquet_column_names"; - private static final String PARQUET_OPTIMIZED_READER_ENABLED = "parquet_optimized_reader_enabled"; - private static final String PARQUET_OPTIMIZED_NESTED_READER_ENABLED = "parquet_optimized_nested_reader_enabled"; private static final String SIZE_BASED_SPLIT_WEIGHTS_ENABLED = "size_based_split_weights_enabled"; private static final String STANDARD_SPLIT_WEIGHT_SIZE = "standard_split_weight_size"; private static final String MINIMUM_ASSIGNED_SPLIT_WEIGHT = "minimum_assigned_split_weight"; @@ -54,7 +51,7 @@ public class HudiSessionProperties private final List> sessionProperties; @Inject - public HudiSessionProperties(HudiConfig hudiConfig, ParquetReaderConfig parquetReaderConfig) + public HudiSessionProperties(HudiConfig hudiConfig) { sessionProperties = ImmutableList.of( new PropertyMetadata<>( @@ -78,16 +75,6 @@ public HudiSessionProperties(HudiConfig hudiConfig, ParquetReaderConfig parquetR "Access parquet columns using names from the file. If disabled, then columns are accessed using index.", hudiConfig.getUseParquetColumnNames(), false), - booleanProperty( - PARQUET_OPTIMIZED_READER_ENABLED, - "Use optimized Parquet reader", - parquetReaderConfig.isOptimizedReaderEnabled(), - false), - booleanProperty( - PARQUET_OPTIMIZED_NESTED_READER_ENABLED, - "Use optimized Parquet reader for nested columns", - parquetReaderConfig.isOptimizedNestedReaderEnabled(), - false), booleanProperty( SIZE_BASED_SPLIT_WEIGHTS_ENABLED, format("If enabled, size-based splitting ensures that each batch of splits has enough data to process as defined by %s", STANDARD_SPLIT_WEIGHT_SIZE), @@ -147,16 +134,6 @@ public static boolean shouldUseParquetColumnNames(ConnectorSession session) return session.getProperty(USE_PARQUET_COLUMN_NAMES, Boolean.class); } - public static boolean isParquetOptimizedReaderEnabled(ConnectorSession session) - { - return session.getProperty(PARQUET_OPTIMIZED_READER_ENABLED, Boolean.class); - } - - public static boolean isParquetOptimizedNestedReaderEnabled(ConnectorSession session) - { - return session.getProperty(PARQUET_OPTIMIZED_NESTED_READER_ENABLED, Boolean.class); - } - public static boolean isSizeBasedSplitWeightsEnabled(ConnectorSession session) { return session.getProperty(SIZE_BASED_SPLIT_WEIGHTS_ENABLED, Boolean.class); diff --git a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestHudiSessionProperties.java b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestHudiSessionProperties.java index 3cde567ca44b..5220921922fa 100644 --- a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestHudiSessionProperties.java +++ b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/TestHudiSessionProperties.java @@ -14,7 +14,6 @@ package io.trino.plugin.hudi; import com.google.common.collect.ImmutableList; -import io.trino.plugin.hive.parquet.ParquetReaderConfig; import io.trino.spi.connector.ConnectorSession; import io.trino.testing.TestingConnectorSession; import org.testng.annotations.Test; @@ -31,7 +30,7 @@ public void testSessionPropertyColumnsToHide() { HudiConfig config = new HudiConfig() .setColumnsToHide("col1, col2"); - HudiSessionProperties sessionProperties = new HudiSessionProperties(config, new ParquetReaderConfig()); + HudiSessionProperties sessionProperties = new HudiSessionProperties(config); ConnectorSession session = TestingConnectorSession.builder() .setPropertyMetadata(sessionProperties.getSessionProperties()) .build(); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java index 259ea8d5db6d..999464846629 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java @@ -158,8 +158,6 @@ import static io.trino.plugin.iceberg.IcebergSessionProperties.getParquetMaxReadBlockSize; import static io.trino.plugin.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled; import static io.trino.plugin.iceberg.IcebergSessionProperties.isOrcNestedLazy; -import static io.trino.plugin.iceberg.IcebergSessionProperties.isParquetOptimizedNestedReaderEnabled; -import static io.trino.plugin.iceberg.IcebergSessionProperties.isParquetOptimizedReaderEnabled; import static io.trino.plugin.iceberg.IcebergSessionProperties.isUseFileSizeFromMetadata; import static io.trino.plugin.iceberg.IcebergSessionProperties.useParquetBloomFilter; import static io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD; @@ -504,9 +502,7 @@ public ReaderPageSourceWithRowPositions createDataPageSource( parquetReaderOptions .withMaxReadBlockSize(getParquetMaxReadBlockSize(session)) .withMaxReadBlockRowCount(getParquetMaxReadBlockRowCount(session)) - .withBatchColumnReaders(isParquetOptimizedReaderEnabled(session)) - .withBloomFilter(useParquetBloomFilter(session)) - .withBatchNestedColumnReaders(isParquetOptimizedNestedReaderEnabled(session)), + .withBloomFilter(useParquetBloomFilter(session)), predicate, fileFormatDataSourceStats, nameMapping, diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergParquetFileWriter.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergParquetFileWriter.java index 104598c8dd0a..81441f4e648f 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergParquetFileWriter.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergParquetFileWriter.java @@ -66,7 +66,6 @@ public IcebergParquetFileWriter( fileInputColumnIndexes, compressionCodec, trinoVersion, - false, Optional.empty(), Optional.empty()); this.metricsConfig = requireNonNull(metricsConfig, "metricsConfig is null"); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSessionProperties.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSessionProperties.java index a607c52b37cd..9a639b285da7 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSessionProperties.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSessionProperties.java @@ -73,8 +73,6 @@ public final class IcebergSessionProperties private static final String PARQUET_MAX_READ_BLOCK_SIZE = "parquet_max_read_block_size"; private static final String PARQUET_USE_BLOOM_FILTER = "parquet_use_bloom_filter"; private static final String PARQUET_MAX_READ_BLOCK_ROW_COUNT = "parquet_max_read_block_row_count"; - private static final String PARQUET_OPTIMIZED_READER_ENABLED = "parquet_optimized_reader_enabled"; - private static final String PARQUET_OPTIMIZED_NESTED_READER_ENABLED = "parquet_optimized_nested_reader_enabled"; private static final String PARQUET_WRITER_BLOCK_SIZE = "parquet_writer_block_size"; private static final String PARQUET_WRITER_PAGE_SIZE = "parquet_writer_page_size"; private static final String PARQUET_WRITER_BATCH_SIZE = "parquet_writer_batch_size"; @@ -219,16 +217,6 @@ public IcebergSessionProperties( } }, false)) - .add(booleanProperty( - PARQUET_OPTIMIZED_READER_ENABLED, - "Use optimized Parquet reader", - parquetReaderConfig.isOptimizedReaderEnabled(), - false)) - .add(booleanProperty( - PARQUET_OPTIMIZED_NESTED_READER_ENABLED, - "Use optimized Parquet reader for nested columns", - parquetReaderConfig.isOptimizedNestedReaderEnabled(), - false)) .add(dataSizeProperty( PARQUET_WRITER_BLOCK_SIZE, "Parquet: Writer block size", @@ -422,16 +410,6 @@ public static int getParquetMaxReadBlockRowCount(ConnectorSession session) return session.getProperty(PARQUET_MAX_READ_BLOCK_ROW_COUNT, Integer.class); } - public static boolean isParquetOptimizedReaderEnabled(ConnectorSession session) - { - return session.getProperty(PARQUET_OPTIMIZED_READER_ENABLED, Boolean.class); - } - - public static boolean isParquetOptimizedNestedReaderEnabled(ConnectorSession session) - { - return session.getProperty(PARQUET_OPTIMIZED_NESTED_READER_ENABLED, Boolean.class); - } - public static DataSize getParquetWriterPageSize(ConnectorSession session) { return session.getProperty(PARQUET_WRITER_PAGE_SIZE, DataSize.class);