diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java index 55410295dd05..1f9203d0671d 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java @@ -324,7 +324,8 @@ public Map getTableColumnStatistics(String databas .stopOnIllegalExceptions() .run("getTableColumnStatistics", stats.getGetTableColumnStatistics().wrap(() -> { try (ThriftMetastoreClient client = createMetastoreClient()) { - return groupStatisticsByColumn(client.getTableColumnStatistics(databaseName, tableName, ImmutableList.copyOf(columnNames))); + List tableColumnStatistics = client.getTableColumnStatistics(databaseName, tableName, ImmutableList.copyOf(columnNames)); + return groupStatisticsByColumn(databaseName, tableName, tableColumnStatistics); } })); } @@ -346,7 +347,7 @@ public Map> getPartitionColumnStatisti .filter(entry -> !entry.getValue().isEmpty()) .collect(toImmutableMap( Map.Entry::getKey, - entry -> groupStatisticsByColumn(entry.getValue()))); + entry -> groupStatisticsByColumn(databaseName, tableName, entry.getValue()))); } @Override @@ -402,11 +403,11 @@ private Map> getPartitionColumnStatistics(Stri } } - private static Map groupStatisticsByColumn(List statistics) + private static Map groupStatisticsByColumn(String databaseName, String tableName, List statistics) { Map statisticsByColumn = new HashMap<>(); for (ColumnStatisticsObj stats : statistics) { - HiveColumnStatistics newColumnStatistics = ThriftMetastoreUtil.fromMetastoreApiColumnStatistics(stats); + HiveColumnStatistics newColumnStatistics = ThriftMetastoreUtil.fromMetastoreApiColumnStatistics(databaseName, tableName, stats); if (statisticsByColumn.containsKey(stats.getColName())) { HiveColumnStatistics existingColumnStatistics = statisticsByColumn.get(stats.getColName()); if (!newColumnStatistics.equals(existingColumnStatistics)) { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreUtil.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreUtil.java index 77a7b47e924f..32ef8f92682c 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreUtil.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreUtil.java @@ -23,6 +23,7 @@ import com.google.common.primitives.Shorts; import io.airlift.compress.v3.zstd.ZstdDecompressor; import io.airlift.json.JsonCodec; +import io.airlift.log.Logger; import io.trino.hive.thrift.metastore.BinaryColumnStatsData; import io.trino.hive.thrift.metastore.BooleanColumnStatsData; import io.trino.hive.thrift.metastore.ColumnStatisticsObj; @@ -165,6 +166,8 @@ public final class ThriftMetastoreUtil { + private static final Logger log = Logger.get(ThriftMetastoreUtil.class); + private static final JsonCodec LANGUAGE_FUNCTION_CODEC = jsonCodec(LanguageFunction.class); private static final String PUBLIC_ROLE_NAME = "public"; private static final String ADMIN_ROLE_NAME = "admin"; @@ -331,11 +334,11 @@ public static Stream listEnabledRoles(ConnectorIdentity identity, Functi } return Stream.concat( - roles, - listApplicableRoles(principal, listRoleGrants) - .map(RoleGrant::getRoleName) - // The admin role must be enabled explicitly. If it is, it was added above. - .filter(Predicate.isEqual(ADMIN_ROLE_NAME).negate())) + roles, + listApplicableRoles(principal, listRoleGrants) + .map(RoleGrant::getRoleName) + // The admin role must be enabled explicitly. If it is, it was added above. + .filter(Predicate.isEqual(ADMIN_ROLE_NAME).negate())) // listApplicableRoles may return role which was already added explicitly above. .distinct(); } @@ -435,8 +438,8 @@ public static boolean isAvroTableWithSchemaSet(io.trino.hive.thrift.metastore.Ta return serdeInfo.getSerializationLib() != null && ((table.getParameters().get(AVRO_SCHEMA_URL_KEY) != null || (serdeInfo.getParameters() != null && serdeInfo.getParameters().get(AVRO_SCHEMA_URL_KEY) != null)) || - (table.getParameters().get(AVRO_SCHEMA_LITERAL_KEY) != null || - (serdeInfo.getParameters() != null && serdeInfo.getParameters().get(AVRO_SCHEMA_LITERAL_KEY) != null))) && + (table.getParameters().get(AVRO_SCHEMA_LITERAL_KEY) != null || + (serdeInfo.getParameters() != null && serdeInfo.getParameters().get(AVRO_SCHEMA_LITERAL_KEY) != null))) && serdeInfo.getSerializationLib().equals(AVRO.getSerde()); } @@ -533,7 +536,7 @@ public static Partition fromMetastoreApiPartition(io.trino.hive.thrift.metastore * Both formats store values as seconds since epoch in HMS, which we convert to microseconds * for Trino's internal representation. */ - public static HiveColumnStatistics fromMetastoreApiColumnStatistics(ColumnStatisticsObj columnStatistics) + public static HiveColumnStatistics fromMetastoreApiColumnStatistics(String databaseName, String tableName, ColumnStatisticsObj columnStatistics) { if (columnStatistics.getStatsData().isSetLongStats()) { LongColumnStatsData longStatsData = columnStatistics.getStatsData().getLongStats(); @@ -623,7 +626,17 @@ public static HiveColumnStatistics fromMetastoreApiColumnStatistics(ColumnStatis OptionalLong distinctValuesWithNullCount = timestampStatsData.isSetNumDVs() ? OptionalLong.of(timestampStatsData.getNumDVs()) : OptionalLong.empty(); return createIntegerColumnStatistics(min, max, nullsCount, distinctValuesWithNullCount); } - throw new TrinoException(HIVE_INVALID_METADATA, "Invalid column statistics data: " + columnStatistics); + log.warn("Unsupported column statistics data in table %s.%s: %s", databaseName, tableName, columnStatistics); + return new HiveColumnStatistics( + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + OptionalLong.empty(), + OptionalDouble.empty(), + OptionalLong.empty(), + OptionalLong.empty()); } private static Optional fromMetastoreDate(Date date) @@ -1050,7 +1063,7 @@ public static boolean isAvroTableWithSchemaSet(Table table) return AVRO.getSerde().equals(table.getStorage().getStorageFormat().getSerDeNullable()) && ((table.getParameters().get(AVRO_SCHEMA_URL_KEY) != null || (table.getStorage().getSerdeParameters().get(AVRO_SCHEMA_URL_KEY) != null)) || - (table.getParameters().get(AVRO_SCHEMA_LITERAL_KEY) != null || - (table.getStorage().getSerdeParameters().get(AVRO_SCHEMA_LITERAL_KEY) != null))); + (table.getParameters().get(AVRO_SCHEMA_LITERAL_KEY) != null || + (table.getStorage().getSerdeParameters().get(AVRO_SCHEMA_LITERAL_KEY) != null))); } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestThriftMetastoreUtil.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestThriftMetastoreUtil.java index e2297be2f189..d302ca8c9508 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestThriftMetastoreUtil.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestThriftMetastoreUtil.java @@ -275,7 +275,7 @@ public void testLongStatsToColumnStatistics() longColumnStatsData.setNumNulls(1); longColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BIGINT_TYPE_NAME, longStats(longColumnStatsData)); - HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj); + HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setIntegerStatistics(new IntegerStatistics(OptionalLong.of(0), OptionalLong.of(100))) @@ -289,7 +289,7 @@ public void testEmptyLongStatsToColumnStatistics() { LongColumnStatsData emptyLongColumnStatsData = new LongColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BIGINT_TYPE_NAME, longStats(emptyLongColumnStatsData)); - HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj); + HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setIntegerStatistics(new IntegerStatistics(OptionalLong.empty(), OptionalLong.empty())) @@ -305,7 +305,7 @@ public void testDoubleStatsToColumnStatistics() doubleColumnStatsData.setNumNulls(1); doubleColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData)); - HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj); + HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(0), OptionalDouble.of(100))) @@ -319,7 +319,7 @@ public void testEmptyDoubleStatsToColumnStatistics() { DoubleColumnStatsData emptyDoubleColumnStatsData = new DoubleColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(emptyDoubleColumnStatsData)); - HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj); + HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setDoubleStatistics(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty())) @@ -337,7 +337,7 @@ public void testDecimalStatsToColumnStatistics() decimalColumnStatsData.setNumNulls(1); decimalColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DECIMAL_TYPE_NAME, decimalStats(decimalColumnStatsData)); - HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj); + HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setDecimalStatistics(new DecimalStatistics(Optional.of(low), Optional.of(high))) @@ -351,7 +351,7 @@ public void testEmptyDecimalStatsToColumnStatistics() { DecimalColumnStatsData emptyDecimalColumnStatsData = new DecimalColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DECIMAL_TYPE_NAME, decimalStats(emptyDecimalColumnStatsData)); - HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj); + HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setDecimalStatistics(new DecimalStatistics(Optional.empty(), Optional.empty())) @@ -366,7 +366,7 @@ public void testBooleanStatsToColumnStatistics() booleanColumnStatsData.setNumFalses(10); booleanColumnStatsData.setNumNulls(0); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(booleanColumnStatsData)); - HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj); + HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setBooleanStatistics(new BooleanStatistics(OptionalLong.of(100), OptionalLong.of(10))) @@ -379,7 +379,7 @@ public void testImpalaGeneratedBooleanStatistics() { BooleanColumnStatsData statsData = new BooleanColumnStatsData(1L, -1L, 2L); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(statsData)); - HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj); + HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())) @@ -392,7 +392,7 @@ public void testEmptyBooleanStatsToColumnStatistics() { BooleanColumnStatsData emptyBooleanColumnStatsData = new BooleanColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(emptyBooleanColumnStatsData)); - HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj); + HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())) @@ -408,7 +408,7 @@ public void testDateStatsToColumnStatistics() dateColumnStatsData.setNumNulls(1); dateColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DATE_TYPE_NAME, dateStats(dateColumnStatsData)); - HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj); + HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setDateStatistics(new DateStatistics(Optional.of(LocalDate.ofEpochDay(1000)), Optional.of(LocalDate.ofEpochDay(2000)))) @@ -422,7 +422,7 @@ public void testEmptyDateStatsToColumnStatistics() { DateColumnStatsData emptyDateColumnStatsData = new DateColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DATE_TYPE_NAME, dateStats(emptyDateColumnStatsData)); - HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj); + HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setDateStatistics(new DateStatistics(Optional.empty(), Optional.empty())) @@ -438,7 +438,7 @@ public void testStringStatsToColumnStatistics() stringColumnStatsData.setNumNulls(1); stringColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", STRING_TYPE_NAME, stringStats(stringColumnStatsData)); - HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj); + HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setMaxValueSizeInBytes(100) @@ -453,7 +453,7 @@ public void testEmptyStringColumnStatsData() { StringColumnStatsData emptyStringColumnStatsData = new StringColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", STRING_TYPE_NAME, stringStats(emptyStringColumnStatsData)); - HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj); + HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj); assertThat(actual).isEqualTo(HiveColumnStatistics.builder().build()); } @@ -466,7 +466,7 @@ public void testBinaryStatsToColumnStatistics() binaryColumnStatsData.setAvgColLen(22.2); binaryColumnStatsData.setNumNulls(2); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BINARY_TYPE_NAME, binaryStats(binaryColumnStatsData)); - HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj); + HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj); assertThat(actual).isEqualTo(HiveColumnStatistics.builder() .setMaxValueSizeInBytes(100) @@ -480,7 +480,7 @@ public void testEmptyBinaryStatsToColumnStatistics() { BinaryColumnStatsData emptyBinaryColumnStatsData = new BinaryColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BINARY_TYPE_NAME, binaryStats(emptyBinaryColumnStatsData)); - HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj); + HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj); assertThat(actual).isEqualTo(HiveColumnStatistics.builder().build()); } @@ -492,7 +492,7 @@ public void testSingleDistinctValue() doubleColumnStatsData.setNumNulls(10); doubleColumnStatsData.setNumDVs(1); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData)); - HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj); + HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj); assertThat(actual.getNullsCount()).isEqualTo(OptionalLong.of(10)); assertThat(actual.getDistinctValuesWithNullCount()).isEqualTo(OptionalLong.of(1)); @@ -501,7 +501,7 @@ public void testSingleDistinctValue() doubleColumnStatsData.setNumNulls(10); doubleColumnStatsData.setNumDVs(1); columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData)); - actual = fromMetastoreApiColumnStatistics(columnStatisticsObj); + actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj); assertThat(actual.getNullsCount()).isEqualTo(OptionalLong.of(10)); assertThat(actual.getDistinctValuesWithNullCount()).isEqualTo(OptionalLong.of(1));