diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/TupleDomainParquetPredicate.java b/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/TupleDomainParquetPredicate.java index 7f23d4a0fa7b..6dcc6b0743ff 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/TupleDomainParquetPredicate.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/TupleDomainParquetPredicate.java @@ -47,6 +47,7 @@ import org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore; import org.apache.parquet.io.ParquetDecodingException; import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.ColumnOrder; import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation; import org.apache.parquet.schema.PrimitiveType; @@ -207,6 +208,10 @@ public boolean matches(Map valueCounts, ColumnIndexStore continue; } + // ParquetMetadataConverter#fromParquetColumnIndex returns null if the parquet primitive type does not support min/max stats + if (!isColumnIndexStatsSupported(column.getPrimitiveType())) { + continue; + } ColumnIndex columnIndex = columnIndexStore.getColumnIndex(ColumnPath.get(column.getPath())); if (columnIndex == null) { continue; @@ -685,6 +690,11 @@ private FilterPredicate convertToParquetFilter(DateTimeZone timeZone) continue; } + // ParquetMetadataConverter#fromParquetColumnIndex returns null if the parquet primitive type does not support min/max stats + if (!isColumnIndexStatsSupported(column.getPrimitiveType())) { + continue; + } + FilterPredicate columnFilter = FilterApi.userDefined( new TrinoIntColumn(ColumnPath.get(column.getPath())), new DomainUserDefinedPredicate<>(column, domain, timeZone)); @@ -808,4 +818,10 @@ private static final class TrinoIntColumn super(columnPath, Integer.class); } } + + // Copy of org.apache.parquet.format.converter.ParquetMetadataConverter#isMinMaxStatsSupported + private static boolean isColumnIndexStatsSupported(PrimitiveType type) + { + return type.columnOrder().getColumnOrderName() == ColumnOrder.ColumnOrderName.TYPE_DEFINED_ORDER; + } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkipping.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkipping.java index 0ffa7cd64f7f..c3e41ac1881d 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkipping.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestParquetPageSkipping.java @@ -126,6 +126,30 @@ public void testFilteringOnColumnNameWithDot() assertUpdate("DROP TABLE " + tableName); } + @Test + public void testUnsupportedColumnIndex() + throws URISyntaxException + { + String tableName = "test_unsupported_column_index_" + randomNameSuffix(); + + // Test for https://github.com/trinodb/trino/issues/16801 + File parquetFile = new File(Resources.getResource("parquet_page_skipping/unsupported_column_index").toURI()); + assertUpdate(format( + "CREATE TABLE %s (stime timestamp(3), btime timestamp(3), detail varchar) WITH (format = 'PARQUET', external_location = '%s')", + tableName, + parquetFile.getAbsolutePath())); + + assertQuery( + "SELECT * FROM " + tableName + " WHERE btime >= timestamp '2023-03-27 13:30:00'", + "VALUES ('2023-03-31 18:00:00.000', '2023-03-31 18:00:00.000', 'record_1')"); + + assertQuery( + "SELECT * FROM " + tableName + " WHERE detail = 'record_2'", + "VALUES ('2023-03-31 18:00:00.000', null, 'record_2')"); + + assertUpdate("DROP TABLE " + tableName); + } + @Test public void testPageSkipping() { diff --git a/plugin/trino-hive/src/test/resources/parquet_page_skipping/unsupported_column_index/issue-16801.parquet b/plugin/trino-hive/src/test/resources/parquet_page_skipping/unsupported_column_index/issue-16801.parquet new file mode 100644 index 000000000000..6e69c4e945df Binary files /dev/null and b/plugin/trino-hive/src/test/resources/parquet_page_skipping/unsupported_column_index/issue-16801.parquet differ