Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,14 @@ static Optional<String> buildGlueExpressionForSingleDomain(String columnName, Do
return Optional.empty();
}

// Glue throws an exception (e.g. input string: "__HIVE_D" is not an integer)
Comment thread
ebyhr marked this conversation as resolved.
Outdated
// for column <> '__HIVE_DEFAULT_PARTITION__' or column = '__HIVE_DEFAULT_PARTITION__' expression on numeric types
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there is any specific reason on not using NOT or IS NULL logical operator provided by Glue ? It was mentioned here - but any reason on not using it https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-catalog-partitions.html#aws-glue-api-catalog-partitions-GetPartitions

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@findinpath and I confirmed the expression returns incorrect result. Let me leave a code comment.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar behavior is shown even for numeric type ?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it behaves same regardless of the types. (We already shared this issue with AWS engineers)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Praveen2112 Added a comment. Please take another look.

// "IS NULL" operator in the official documentation always returns empty result regardless of the type.
// https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-catalog-partitions.html#aws-glue-api-catalog-partitions-GetPartitions
if ((domain.getValues().isAll() || domain.getValues().isNone()) && !isQuotedType(domain.getType())) {
return Optional.empty();
}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code below doesn't handle the IS NULL OR IS some value case correctly.
-- It's apparent from the fact it "recognizes" IS NULL case by inspecting valueSet.isNone() (which is quite implicit), instead of checking domain.isNullAllowed(). Thus, it doesn't discern "some values" and "some values OR NULL" cases.

#13214 should make it clear (it doesn't solve the problem just yet)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for your explanation. I was going to handle IS NULL OR IS some value case in #13122 after this PR.


if (domain.getValues().isAll()) {
return Optional.of(format("(%s <> '%s')", columnName, NULL_STRING));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,13 @@
import io.trino.spi.type.DateType;
import io.trino.spi.type.IntegerType;
import io.trino.spi.type.SmallintType;
import io.trino.spi.type.TimestampType;
import io.trino.spi.type.TinyintType;
import io.trino.spi.type.Type;
import io.trino.spi.type.VarcharType;
import io.trino.testing.MaterializedResult;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.File;
Expand Down Expand Up @@ -167,6 +170,10 @@ public class TestHiveGlueMetastore
.addAll(CREATE_TABLE_COLUMNS)
.add(new ColumnMetadata(PARTITION_KEY, DateType.DATE))
.build();
private static final List<ColumnMetadata> CREATE_TABLE_COLUMNS_PARTITIONED_TIMESTAMP = ImmutableList.<ColumnMetadata>builder()
.addAll(CREATE_TABLE_COLUMNS)
.add(new ColumnMetadata(PARTITION_KEY, TimestampType.TIMESTAMP_MILLIS))
.build();
private static final List<String> VARCHAR_PARTITION_VALUES = ImmutableList.of("2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01");

protected static final HiveBasicStatistics HIVE_BASIC_STATISTICS = new HiveBasicStatistics(1000, 5000, 3000, 4000);
Expand Down Expand Up @@ -828,6 +835,7 @@ public void testGetPartitionsFilterIsNullWithValue()
.addDomain(PARTITION_KEY, Domain.onlyNull(VarcharType.VARCHAR))
.build();
List<String> partitionList = new ArrayList<>();
partitionList.add("100");
partitionList.add(null);
doGetPartitionsFilterTest(
CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR,
Expand All @@ -837,6 +845,83 @@ public void testGetPartitionsFilterIsNullWithValue()
ImmutableList.of(ImmutableList.of(GlueExpressionUtil.NULL_STRING)));
}

@Test
public void testGetPartitionsFilterIsNotNull()
throws Exception
{
TupleDomain<String> isNotNullFilter = new PartitionFilterBuilder()
.addDomain(PARTITION_KEY, Domain.notNull(VarcharType.VARCHAR))
.build();
List<String> partitionList = new ArrayList<>();
partitionList.add("100");
partitionList.add(null);

doGetPartitionsFilterTest(
CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR,
PARTITION_KEY,
partitionList,
ImmutableList.of(isNotNullFilter),
ImmutableList.of(ImmutableList.of("100")));
}

@Test(dataProvider = "unsupportedNullPushdownTypes")
public void testGetPartitionsFilterUnsupportedIsNull(List<ColumnMetadata> columnMetadata, Type type, String partitionValue)
throws Exception
{
TupleDomain<String> isNullFilter = new PartitionFilterBuilder()
.addDomain(PARTITION_KEY, Domain.onlyNull(type))
.build();
List<String> partitionList = new ArrayList<>();
partitionList.add(partitionValue);
partitionList.add(null);

doGetPartitionsFilterTest(
columnMetadata,
PARTITION_KEY,
partitionList,
ImmutableList.of(isNullFilter),
// Currently, we get NULL partition from Glue and filter it in our side because
// (column = '__HIVE_DEFAULT_PARTITION__') on numeric types causes exception on Glue. e.g. 'input string: "__HIVE_D" is not an integer'
ImmutableList.of(ImmutableList.of(partitionValue, GlueExpressionUtil.NULL_STRING)));
}

@Test(dataProvider = "unsupportedNullPushdownTypes")
public void testGetPartitionsFilterUnsupportedIsNotNull(List<ColumnMetadata> columnMetadata, Type type, String partitionValue)
throws Exception
{
TupleDomain<String> isNotNullFilter = new PartitionFilterBuilder()
.addDomain(PARTITION_KEY, Domain.notNull(type))
.build();
List<String> partitionList = new ArrayList<>();
partitionList.add(partitionValue);
partitionList.add(null);

doGetPartitionsFilterTest(
columnMetadata,
PARTITION_KEY,
partitionList,
ImmutableList.of(isNotNullFilter),
// Currently, we get NULL partition from Glue and filter it in our side because
// (column <> '__HIVE_DEFAULT_PARTITION__') on numeric types causes exception on Glue. e.g. 'input string: "__HIVE_D" is not an integer'
ImmutableList.of(ImmutableList.of(partitionValue, GlueExpressionUtil.NULL_STRING)));
}

@DataProvider
public Object[][] unsupportedNullPushdownTypes()
{
return new Object[][] {
// Numeric types are unsupported for IS (NOT) NULL predicate pushdown
{CREATE_TABLE_COLUMNS_PARTITIONED_TINYINT, TinyintType.TINYINT, "127"},
{CREATE_TABLE_COLUMNS_PARTITIONED_SMALLINT, SmallintType.SMALLINT, "32767"},
{CREATE_TABLE_COLUMNS_PARTITIONED_INTEGER, IntegerType.INTEGER, "2147483647"},
{CREATE_TABLE_COLUMNS_PARTITIONED_BIGINT, BigintType.BIGINT, "9223372036854775807"},
{CREATE_TABLE_COLUMNS_PARTITIONED_DECIMAL, DECIMAL_TYPE, "12345.12345"},
// Date and timestamp aren't numeric types, but the pushdown is unsupported because of GlueExpressionUtil.canConvertSqlTypeToStringForGlue
{CREATE_TABLE_COLUMNS_PARTITIONED_DATE, DateType.DATE, "2022-07-11"},
{CREATE_TABLE_COLUMNS_PARTITIONED_TIMESTAMP, TimestampType.TIMESTAMP_MILLIS, "2022-07-11 01:02:03.123"},
};
}

@Test
public void testUpdateStatisticsOnCreate()
{
Expand Down