diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueExpressionUtil.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueExpressionUtil.java index 031fe34c68b4..152af4f7131d 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueExpressionUtil.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueExpressionUtil.java @@ -147,6 +147,14 @@ static Optional buildGlueExpressionForSingleDomain(String columnName, Do return Optional.empty(); } + // Glue throws an exception (e.g. input string: "__HIVE_D" is not an integer) + // for column <> '__HIVE_DEFAULT_PARTITION__' or column = '__HIVE_DEFAULT_PARTITION__' expression on numeric types + // "IS NULL" operator in the official documentation always returns empty result regardless of the type. + // https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-catalog-partitions.html#aws-glue-api-catalog-partitions-GetPartitions + if ((domain.getValues().isAll() || domain.getValues().isNone()) && !isQuotedType(domain.getType())) { + return Optional.empty(); + } + if (domain.getValues().isAll()) { return Optional.of(format("(%s <> '%s')", columnName, NULL_STRING)); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java index 3025e3f47df5..8ef5ef7fb0e4 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java @@ -60,10 +60,13 @@ import io.trino.spi.type.DateType; import io.trino.spi.type.IntegerType; import io.trino.spi.type.SmallintType; +import io.trino.spi.type.TimestampType; import io.trino.spi.type.TinyintType; +import io.trino.spi.type.Type; import io.trino.spi.type.VarcharType; import io.trino.testing.MaterializedResult; import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; @@ -167,6 +170,10 @@ public class TestHiveGlueMetastore .addAll(CREATE_TABLE_COLUMNS) .add(new ColumnMetadata(PARTITION_KEY, DateType.DATE)) .build(); + private static final List CREATE_TABLE_COLUMNS_PARTITIONED_TIMESTAMP = ImmutableList.builder() + .addAll(CREATE_TABLE_COLUMNS) + .add(new ColumnMetadata(PARTITION_KEY, TimestampType.TIMESTAMP_MILLIS)) + .build(); private static final List VARCHAR_PARTITION_VALUES = ImmutableList.of("2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01"); protected static final HiveBasicStatistics HIVE_BASIC_STATISTICS = new HiveBasicStatistics(1000, 5000, 3000, 4000); @@ -828,6 +835,7 @@ public void testGetPartitionsFilterIsNullWithValue() .addDomain(PARTITION_KEY, Domain.onlyNull(VarcharType.VARCHAR)) .build(); List partitionList = new ArrayList<>(); + partitionList.add("100"); partitionList.add(null); doGetPartitionsFilterTest( CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, @@ -837,6 +845,83 @@ public void testGetPartitionsFilterIsNullWithValue() ImmutableList.of(ImmutableList.of(GlueExpressionUtil.NULL_STRING))); } + @Test + public void testGetPartitionsFilterIsNotNull() + throws Exception + { + TupleDomain isNotNullFilter = new PartitionFilterBuilder() + .addDomain(PARTITION_KEY, Domain.notNull(VarcharType.VARCHAR)) + .build(); + List partitionList = new ArrayList<>(); + partitionList.add("100"); + partitionList.add(null); + + doGetPartitionsFilterTest( + CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, + PARTITION_KEY, + partitionList, + ImmutableList.of(isNotNullFilter), + ImmutableList.of(ImmutableList.of("100"))); + } + + @Test(dataProvider = "unsupportedNullPushdownTypes") + public void testGetPartitionsFilterUnsupportedIsNull(List columnMetadata, Type type, String partitionValue) + throws Exception + { + TupleDomain isNullFilter = new PartitionFilterBuilder() + .addDomain(PARTITION_KEY, Domain.onlyNull(type)) + .build(); + List partitionList = new ArrayList<>(); + partitionList.add(partitionValue); + partitionList.add(null); + + doGetPartitionsFilterTest( + columnMetadata, + PARTITION_KEY, + partitionList, + ImmutableList.of(isNullFilter), + // Currently, we get NULL partition from Glue and filter it in our side because + // (column = '__HIVE_DEFAULT_PARTITION__') on numeric types causes exception on Glue. e.g. 'input string: "__HIVE_D" is not an integer' + ImmutableList.of(ImmutableList.of(partitionValue, GlueExpressionUtil.NULL_STRING))); + } + + @Test(dataProvider = "unsupportedNullPushdownTypes") + public void testGetPartitionsFilterUnsupportedIsNotNull(List columnMetadata, Type type, String partitionValue) + throws Exception + { + TupleDomain isNotNullFilter = new PartitionFilterBuilder() + .addDomain(PARTITION_KEY, Domain.notNull(type)) + .build(); + List partitionList = new ArrayList<>(); + partitionList.add(partitionValue); + partitionList.add(null); + + doGetPartitionsFilterTest( + columnMetadata, + PARTITION_KEY, + partitionList, + ImmutableList.of(isNotNullFilter), + // Currently, we get NULL partition from Glue and filter it in our side because + // (column <> '__HIVE_DEFAULT_PARTITION__') on numeric types causes exception on Glue. e.g. 'input string: "__HIVE_D" is not an integer' + ImmutableList.of(ImmutableList.of(partitionValue, GlueExpressionUtil.NULL_STRING))); + } + + @DataProvider + public Object[][] unsupportedNullPushdownTypes() + { + return new Object[][] { + // Numeric types are unsupported for IS (NOT) NULL predicate pushdown + {CREATE_TABLE_COLUMNS_PARTITIONED_TINYINT, TinyintType.TINYINT, "127"}, + {CREATE_TABLE_COLUMNS_PARTITIONED_SMALLINT, SmallintType.SMALLINT, "32767"}, + {CREATE_TABLE_COLUMNS_PARTITIONED_INTEGER, IntegerType.INTEGER, "2147483647"}, + {CREATE_TABLE_COLUMNS_PARTITIONED_BIGINT, BigintType.BIGINT, "9223372036854775807"}, + {CREATE_TABLE_COLUMNS_PARTITIONED_DECIMAL, DECIMAL_TYPE, "12345.12345"}, + // Date and timestamp aren't numeric types, but the pushdown is unsupported because of GlueExpressionUtil.canConvertSqlTypeToStringForGlue + {CREATE_TABLE_COLUMNS_PARTITIONED_DATE, DateType.DATE, "2022-07-11"}, + {CREATE_TABLE_COLUMNS_PARTITIONED_TIMESTAMP, TimestampType.TIMESTAMP_MILLIS, "2022-07-11 01:02:03.123"}, + }; + } + @Test public void testUpdateStatisticsOnCreate() {