Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,8 @@ public Map<String, HiveColumnStatistics> getTableColumnStatistics(String databas
.stopOnIllegalExceptions()
.run("getTableColumnStatistics", stats.getGetTableColumnStatistics().wrap(() -> {
try (ThriftMetastoreClient client = createMetastoreClient()) {
return groupStatisticsByColumn(client.getTableColumnStatistics(databaseName, tableName, ImmutableList.copyOf(columnNames)));
List<ColumnStatisticsObj> tableColumnStatistics = client.getTableColumnStatistics(databaseName, tableName, ImmutableList.copyOf(columnNames));
return groupStatisticsByColumn(databaseName, tableName, tableColumnStatistics);
}
}));
}
Expand All @@ -346,7 +347,7 @@ public Map<String, Map<String, HiveColumnStatistics>> getPartitionColumnStatisti
.filter(entry -> !entry.getValue().isEmpty())
.collect(toImmutableMap(
Map.Entry::getKey,
entry -> groupStatisticsByColumn(entry.getValue())));
entry -> groupStatisticsByColumn(databaseName, tableName, entry.getValue())));
}

@Override
Expand Down Expand Up @@ -402,11 +403,11 @@ private Map<String, List<ColumnStatisticsObj>> getPartitionColumnStatistics(Stri
}
}

private static Map<String, HiveColumnStatistics> groupStatisticsByColumn(List<ColumnStatisticsObj> statistics)
private static Map<String, HiveColumnStatistics> groupStatisticsByColumn(String databaseName, String tableName, List<ColumnStatisticsObj> statistics)
{
Map<String, HiveColumnStatistics> statisticsByColumn = new HashMap<>();
for (ColumnStatisticsObj stats : statistics) {
HiveColumnStatistics newColumnStatistics = ThriftMetastoreUtil.fromMetastoreApiColumnStatistics(stats);
HiveColumnStatistics newColumnStatistics = ThriftMetastoreUtil.fromMetastoreApiColumnStatistics(databaseName, tableName, stats);
if (statisticsByColumn.containsKey(stats.getColName())) {
HiveColumnStatistics existingColumnStatistics = statisticsByColumn.get(stats.getColName());
if (!newColumnStatistics.equals(existingColumnStatistics)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.google.common.primitives.Shorts;
import io.airlift.compress.v3.zstd.ZstdDecompressor;
import io.airlift.json.JsonCodec;
import io.airlift.log.Logger;
import io.trino.hive.thrift.metastore.BinaryColumnStatsData;
import io.trino.hive.thrift.metastore.BooleanColumnStatsData;
import io.trino.hive.thrift.metastore.ColumnStatisticsObj;
Expand Down Expand Up @@ -165,6 +166,8 @@

public final class ThriftMetastoreUtil
{
private static final Logger log = Logger.get(ThriftMetastoreUtil.class);

private static final JsonCodec<LanguageFunction> LANGUAGE_FUNCTION_CODEC = jsonCodec(LanguageFunction.class);
private static final String PUBLIC_ROLE_NAME = "public";
private static final String ADMIN_ROLE_NAME = "admin";
Expand Down Expand Up @@ -331,11 +334,11 @@ public static Stream<String> listEnabledRoles(ConnectorIdentity identity, Functi
}

return Stream.concat(
roles,
listApplicableRoles(principal, listRoleGrants)
.map(RoleGrant::getRoleName)
// The admin role must be enabled explicitly. If it is, it was added above.
.filter(Predicate.isEqual(ADMIN_ROLE_NAME).negate()))
roles,
listApplicableRoles(principal, listRoleGrants)
.map(RoleGrant::getRoleName)
// The admin role must be enabled explicitly. If it is, it was added above.
.filter(Predicate.isEqual(ADMIN_ROLE_NAME).negate()))
// listApplicableRoles may return role which was already added explicitly above.
.distinct();
}
Expand Down Expand Up @@ -435,8 +438,8 @@ public static boolean isAvroTableWithSchemaSet(io.trino.hive.thrift.metastore.Ta
return serdeInfo.getSerializationLib() != null &&
((table.getParameters().get(AVRO_SCHEMA_URL_KEY) != null ||
(serdeInfo.getParameters() != null && serdeInfo.getParameters().get(AVRO_SCHEMA_URL_KEY) != null)) ||
(table.getParameters().get(AVRO_SCHEMA_LITERAL_KEY) != null ||
(serdeInfo.getParameters() != null && serdeInfo.getParameters().get(AVRO_SCHEMA_LITERAL_KEY) != null))) &&
(table.getParameters().get(AVRO_SCHEMA_LITERAL_KEY) != null ||
(serdeInfo.getParameters() != null && serdeInfo.getParameters().get(AVRO_SCHEMA_LITERAL_KEY) != null))) &&
serdeInfo.getSerializationLib().equals(AVRO.getSerde());
}

Expand Down Expand Up @@ -533,7 +536,7 @@ public static Partition fromMetastoreApiPartition(io.trino.hive.thrift.metastore
* Both formats store values as seconds since epoch in HMS, which we convert to microseconds
* for Trino's internal representation.
*/
public static HiveColumnStatistics fromMetastoreApiColumnStatistics(ColumnStatisticsObj columnStatistics)
public static HiveColumnStatistics fromMetastoreApiColumnStatistics(String databaseName, String tableName, ColumnStatisticsObj columnStatistics)
{
if (columnStatistics.getStatsData().isSetLongStats()) {
LongColumnStatsData longStatsData = columnStatistics.getStatsData().getLongStats();
Expand Down Expand Up @@ -623,7 +626,17 @@ public static HiveColumnStatistics fromMetastoreApiColumnStatistics(ColumnStatis
OptionalLong distinctValuesWithNullCount = timestampStatsData.isSetNumDVs() ? OptionalLong.of(timestampStatsData.getNumDVs()) : OptionalLong.empty();
return createIntegerColumnStatistics(min, max, nullsCount, distinctValuesWithNullCount);
}
throw new TrinoException(HIVE_INVALID_METADATA, "Invalid column statistics data: " + columnStatistics);
log.warn("Unsupported column statistics data in table %s.%s: %s", databaseName, tableName, columnStatistics);
return new HiveColumnStatistics(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pls add test coverage in io.trino.plugin.hive.metastore.thrift.TestThriftMetastoreUtil

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's covered by product test. I will need your help with more coverage. Do you want to take over this PR?

Optional.empty(),
Optional.empty(),
Optional.empty(),
Optional.empty(),
Optional.empty(),
OptionalLong.empty(),
OptionalDouble.empty(),
OptionalLong.empty(),
OptionalLong.empty());
}

private static Optional<LocalDate> fromMetastoreDate(Date date)
Expand Down Expand Up @@ -1050,7 +1063,7 @@ public static boolean isAvroTableWithSchemaSet(Table table)
return AVRO.getSerde().equals(table.getStorage().getStorageFormat().getSerDeNullable()) &&
((table.getParameters().get(AVRO_SCHEMA_URL_KEY) != null ||
(table.getStorage().getSerdeParameters().get(AVRO_SCHEMA_URL_KEY) != null)) ||
(table.getParameters().get(AVRO_SCHEMA_LITERAL_KEY) != null ||
(table.getStorage().getSerdeParameters().get(AVRO_SCHEMA_LITERAL_KEY) != null)));
(table.getParameters().get(AVRO_SCHEMA_LITERAL_KEY) != null ||
(table.getStorage().getSerdeParameters().get(AVRO_SCHEMA_LITERAL_KEY) != null)));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ public void testLongStatsToColumnStatistics()
longColumnStatsData.setNumNulls(1);
longColumnStatsData.setNumDVs(20);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BIGINT_TYPE_NAME, longStats(longColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj);
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj);

assertThat(actual).isEqualTo(HiveColumnStatistics.builder()
.setIntegerStatistics(new IntegerStatistics(OptionalLong.of(0), OptionalLong.of(100)))
Expand All @@ -289,7 +289,7 @@ public void testEmptyLongStatsToColumnStatistics()
{
LongColumnStatsData emptyLongColumnStatsData = new LongColumnStatsData();
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BIGINT_TYPE_NAME, longStats(emptyLongColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj);
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj);

assertThat(actual).isEqualTo(HiveColumnStatistics.builder()
.setIntegerStatistics(new IntegerStatistics(OptionalLong.empty(), OptionalLong.empty()))
Expand All @@ -305,7 +305,7 @@ public void testDoubleStatsToColumnStatistics()
doubleColumnStatsData.setNumNulls(1);
doubleColumnStatsData.setNumDVs(20);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj);
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj);

assertThat(actual).isEqualTo(HiveColumnStatistics.builder()
.setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(0), OptionalDouble.of(100)))
Expand All @@ -319,7 +319,7 @@ public void testEmptyDoubleStatsToColumnStatistics()
{
DoubleColumnStatsData emptyDoubleColumnStatsData = new DoubleColumnStatsData();
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(emptyDoubleColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj);
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj);

assertThat(actual).isEqualTo(HiveColumnStatistics.builder()
.setDoubleStatistics(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty()))
Expand All @@ -337,7 +337,7 @@ public void testDecimalStatsToColumnStatistics()
decimalColumnStatsData.setNumNulls(1);
decimalColumnStatsData.setNumDVs(20);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DECIMAL_TYPE_NAME, decimalStats(decimalColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj);
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj);

assertThat(actual).isEqualTo(HiveColumnStatistics.builder()
.setDecimalStatistics(new DecimalStatistics(Optional.of(low), Optional.of(high)))
Expand All @@ -351,7 +351,7 @@ public void testEmptyDecimalStatsToColumnStatistics()
{
DecimalColumnStatsData emptyDecimalColumnStatsData = new DecimalColumnStatsData();
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DECIMAL_TYPE_NAME, decimalStats(emptyDecimalColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj);
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj);

assertThat(actual).isEqualTo(HiveColumnStatistics.builder()
.setDecimalStatistics(new DecimalStatistics(Optional.empty(), Optional.empty()))
Expand All @@ -366,7 +366,7 @@ public void testBooleanStatsToColumnStatistics()
booleanColumnStatsData.setNumFalses(10);
booleanColumnStatsData.setNumNulls(0);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(booleanColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj);
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj);

assertThat(actual).isEqualTo(HiveColumnStatistics.builder()
.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(100), OptionalLong.of(10)))
Expand All @@ -379,7 +379,7 @@ public void testImpalaGeneratedBooleanStatistics()
{
BooleanColumnStatsData statsData = new BooleanColumnStatsData(1L, -1L, 2L);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(statsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj);
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj);

assertThat(actual).isEqualTo(HiveColumnStatistics.builder()
.setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty()))
Expand All @@ -392,7 +392,7 @@ public void testEmptyBooleanStatsToColumnStatistics()
{
BooleanColumnStatsData emptyBooleanColumnStatsData = new BooleanColumnStatsData();
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(emptyBooleanColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj);
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj);

assertThat(actual).isEqualTo(HiveColumnStatistics.builder()
.setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty()))
Expand All @@ -408,7 +408,7 @@ public void testDateStatsToColumnStatistics()
dateColumnStatsData.setNumNulls(1);
dateColumnStatsData.setNumDVs(20);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DATE_TYPE_NAME, dateStats(dateColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj);
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj);

assertThat(actual).isEqualTo(HiveColumnStatistics.builder()
.setDateStatistics(new DateStatistics(Optional.of(LocalDate.ofEpochDay(1000)), Optional.of(LocalDate.ofEpochDay(2000))))
Expand All @@ -422,7 +422,7 @@ public void testEmptyDateStatsToColumnStatistics()
{
DateColumnStatsData emptyDateColumnStatsData = new DateColumnStatsData();
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DATE_TYPE_NAME, dateStats(emptyDateColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj);
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj);

assertThat(actual).isEqualTo(HiveColumnStatistics.builder()
.setDateStatistics(new DateStatistics(Optional.empty(), Optional.empty()))
Expand All @@ -438,7 +438,7 @@ public void testStringStatsToColumnStatistics()
stringColumnStatsData.setNumNulls(1);
stringColumnStatsData.setNumDVs(20);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", STRING_TYPE_NAME, stringStats(stringColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj);
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj);

assertThat(actual).isEqualTo(HiveColumnStatistics.builder()
.setMaxValueSizeInBytes(100)
Expand All @@ -453,7 +453,7 @@ public void testEmptyStringColumnStatsData()
{
StringColumnStatsData emptyStringColumnStatsData = new StringColumnStatsData();
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", STRING_TYPE_NAME, stringStats(emptyStringColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj);
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj);

assertThat(actual).isEqualTo(HiveColumnStatistics.builder().build());
}
Expand All @@ -466,7 +466,7 @@ public void testBinaryStatsToColumnStatistics()
binaryColumnStatsData.setAvgColLen(22.2);
binaryColumnStatsData.setNumNulls(2);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BINARY_TYPE_NAME, binaryStats(binaryColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj);
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj);

assertThat(actual).isEqualTo(HiveColumnStatistics.builder()
.setMaxValueSizeInBytes(100)
Expand All @@ -480,7 +480,7 @@ public void testEmptyBinaryStatsToColumnStatistics()
{
BinaryColumnStatsData emptyBinaryColumnStatsData = new BinaryColumnStatsData();
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BINARY_TYPE_NAME, binaryStats(emptyBinaryColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj);
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj);

assertThat(actual).isEqualTo(HiveColumnStatistics.builder().build());
}
Expand All @@ -492,7 +492,7 @@ public void testSingleDistinctValue()
doubleColumnStatsData.setNumNulls(10);
doubleColumnStatsData.setNumDVs(1);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj);
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj);

assertThat(actual.getNullsCount()).isEqualTo(OptionalLong.of(10));
assertThat(actual.getDistinctValuesWithNullCount()).isEqualTo(OptionalLong.of(1));
Expand All @@ -501,7 +501,7 @@ public void testSingleDistinctValue()
doubleColumnStatsData.setNumNulls(10);
doubleColumnStatsData.setNumDVs(1);
columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData));
actual = fromMetastoreApiColumnStatistics(columnStatisticsObj);
actual = fromMetastoreApiColumnStatistics("fake_db", "fake_tbl", columnStatisticsObj);

assertThat(actual.getNullsCount()).isEqualTo(OptionalLong.of(10));
assertThat(actual.getDistinctValuesWithNullCount()).isEqualTo(OptionalLong.of(1));
Expand Down