Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import io.trino.operator.aggregation.MaxDataSizeForStats;
import io.trino.operator.aggregation.SumDataSizeForStats;
import io.trino.spi.TrinoException;
import io.trino.spi.expression.FunctionName;
import io.trino.spi.statistics.ColumnStatisticMetadata;
import io.trino.spi.statistics.ColumnStatisticType;
import io.trino.spi.statistics.TableStatisticType;
Expand All @@ -36,6 +37,7 @@
import java.util.Map;
import java.util.Optional;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Verify.verify;
import static com.google.common.base.Verify.verifyNotNull;
import static com.google.common.collect.ImmutableList.toImmutableList;
Expand All @@ -44,7 +46,6 @@
import static io.trino.spi.statistics.TableStatisticType.ROW_COUNT;
import static io.trino.spi.type.BigintType.BIGINT;
import static io.trino.spi.type.BooleanType.BOOLEAN;
import static io.trino.spi.type.HyperLogLogType.HYPER_LOG_LOG;
import static io.trino.sql.analyzer.TypeSignatureProvider.fromTypes;
import static java.util.Objects.requireNonNull;

Expand Down Expand Up @@ -93,13 +94,23 @@ public TableStatisticAggregation createStatisticsAggregation(TableStatisticsMeta

for (ColumnStatisticMetadata columnStatisticMetadata : statisticsMetadata.getColumnStatistics()) {
String columnName = columnStatisticMetadata.getColumnName();
ColumnStatisticType statisticType = columnStatisticMetadata.getStatisticType();
Symbol inputSymbol = columnToSymbolMap.get(columnName);
verifyNotNull(inputSymbol, "inputSymbol is null");
Type inputType = symbolAllocator.getTypes().get(inputSymbol);
verifyNotNull(inputType, "inputType is null for symbol: %s", inputSymbol);
ColumnStatisticsAggregation aggregation = createColumnAggregation(statisticType, inputSymbol, inputType);
Symbol symbol = symbolAllocator.newSymbol(statisticType + ":" + columnName, aggregation.getOutputType());
ColumnStatisticsAggregation aggregation;
String symbolHint;
if (columnStatisticMetadata.getStatisticTypeIfPresent().isPresent()) {
ColumnStatisticType statisticType = columnStatisticMetadata.getStatisticType();
aggregation = createColumnAggregation(statisticType, inputSymbol, inputType);
symbolHint = statisticType + ":" + columnName;
}
else {
FunctionName aggregationName = columnStatisticMetadata.getAggregation();
aggregation = createColumnAggregation(aggregationName, inputSymbol, inputType);
symbolHint = aggregationName.getName() + ":" + columnName;
}
Symbol symbol = symbolAllocator.newSymbol(symbolHint, aggregation.getOutputType());
aggregations.put(symbol, aggregation.getAggregation());
descriptor.addColumnStatistic(columnStatisticMetadata, symbol);
}
Expand All @@ -111,20 +122,26 @@ public TableStatisticAggregation createStatisticsAggregation(TableStatisticsMeta
private ColumnStatisticsAggregation createColumnAggregation(ColumnStatisticType statisticType, Symbol input, Type inputType)
{
return switch (statisticType) {
case MIN_VALUE -> createAggregation(QualifiedName.of("min"), input.toSymbolReference(), inputType, inputType);
case MAX_VALUE -> createAggregation(QualifiedName.of("max"), input.toSymbolReference(), inputType, inputType);
case NUMBER_OF_DISTINCT_VALUES -> createAggregation(QualifiedName.of("approx_distinct"), input.toSymbolReference(), inputType, BIGINT);
case MIN_VALUE -> createAggregation(QualifiedName.of("min"), input.toSymbolReference(), inputType);
case MAX_VALUE -> createAggregation(QualifiedName.of("max"), input.toSymbolReference(), inputType);
case NUMBER_OF_DISTINCT_VALUES -> createAggregation(QualifiedName.of("approx_distinct"), input.toSymbolReference(), inputType);
case NUMBER_OF_DISTINCT_VALUES_SUMMARY ->
// we use $approx_set here and not approx_set because latter is not defined for all types supported by Trino
createAggregation(QualifiedName.of("$approx_set"), input.toSymbolReference(), inputType, HYPER_LOG_LOG);
case NUMBER_OF_NON_NULL_VALUES -> createAggregation(QualifiedName.of("count"), input.toSymbolReference(), inputType, BIGINT);
case NUMBER_OF_TRUE_VALUES -> createAggregation(QualifiedName.of("count_if"), input.toSymbolReference(), BOOLEAN, BIGINT);
case TOTAL_SIZE_IN_BYTES -> createAggregation(QualifiedName.of(SumDataSizeForStats.NAME), input.toSymbolReference(), inputType, BIGINT);
case MAX_VALUE_SIZE_IN_BYTES -> createAggregation(QualifiedName.of(MaxDataSizeForStats.NAME), input.toSymbolReference(), inputType, BIGINT);
createAggregation(QualifiedName.of("$approx_set"), input.toSymbolReference(), inputType);
case NUMBER_OF_NON_NULL_VALUES -> createAggregation(QualifiedName.of("count"), input.toSymbolReference(), inputType);
case NUMBER_OF_TRUE_VALUES -> createAggregation(QualifiedName.of("count_if"), input.toSymbolReference(), BOOLEAN);
case TOTAL_SIZE_IN_BYTES -> createAggregation(QualifiedName.of(SumDataSizeForStats.NAME), input.toSymbolReference(), inputType);
case MAX_VALUE_SIZE_IN_BYTES -> createAggregation(QualifiedName.of(MaxDataSizeForStats.NAME), input.toSymbolReference(), inputType);
};
}

private ColumnStatisticsAggregation createAggregation(QualifiedName functionName, SymbolReference input, Type inputType, Type outputType)
private ColumnStatisticsAggregation createColumnAggregation(FunctionName aggregation, Symbol input, Type inputType)
{
checkArgument(aggregation.getCatalogSchema().isEmpty(), "Catalog/schema name not supported");
return createAggregation(QualifiedName.of(aggregation.getName()), input.toSymbolReference(), inputType);
}

private ColumnStatisticsAggregation createAggregation(QualifiedName functionName, SymbolReference input, Type inputType)
{
ResolvedFunction resolvedFunction = metadata.resolveFunction(session, functionName, fromTypes(inputType));
Type resolvedType = getOnlyElement(resolvedFunction.getSignature().getArgumentTypes());
Expand All @@ -137,7 +154,7 @@ private ColumnStatisticsAggregation createAggregation(QualifiedName functionName
Optional.empty(),
Optional.empty(),
Optional.empty()),
outputType);
resolvedFunction.getSignature().getReturnType());
}

public static class TableStatisticAggregation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,19 @@
package io.trino.sql.planner.plan;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.JsonSerializer;
import com.fasterxml.jackson.databind.KeyDeserializer;
import com.fasterxml.jackson.databind.SerializerProvider;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
import io.trino.spi.statistics.ColumnStatisticMetadata;
import io.trino.spi.statistics.ColumnStatisticType;
import io.trino.spi.statistics.TableStatisticType;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.Function;

import static com.google.common.base.MoreObjects.toStringHelper;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Verify.verifyNotNull;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static java.util.Objects.requireNonNull;

Expand All @@ -50,17 +41,30 @@ public static <T> StatisticAggregationsDescriptor<T> empty()
return StatisticAggregationsDescriptor.<T>builder().build();
}

@JsonCreator
public StatisticAggregationsDescriptor(
@JsonProperty("grouping") Map<String, T> grouping,
@JsonProperty("tableStatistics") Map<TableStatisticType, T> tableStatistics,
@JsonProperty("columnStatistics") Map<ColumnStatisticMetadata, T> columnStatistics)
Map<String, T> grouping,
Map<TableStatisticType, T> tableStatistics,
Map<ColumnStatisticMetadata, T> columnStatistics)
{
this.grouping = ImmutableMap.copyOf(requireNonNull(grouping, "grouping is null"));
this.tableStatistics = ImmutableMap.copyOf(requireNonNull(tableStatistics, "tableStatistics is null"));
this.columnStatistics = ImmutableMap.copyOf(requireNonNull(columnStatistics, "columnStatistics is null"));
}

@JsonCreator
@Deprecated // for JSON serialization only
public static <T> StatisticAggregationsDescriptor<T> fromJson(
@JsonProperty("grouping") Map<String, T> grouping,
@JsonProperty("tableStatistics") Map<TableStatisticType, T> tableStatistics,
@JsonProperty("columnStatisticsList") List<ColumnStatisticAggregationsDescriptor<T>> columnStatistics)
{
return new StatisticAggregationsDescriptor<>(
grouping,
tableStatistics,
columnStatistics.stream()
.collect(toImmutableMap(ColumnStatisticAggregationsDescriptor::metadata, ColumnStatisticAggregationsDescriptor::input)));
}

@JsonProperty
public Map<String, T> getGrouping()
{
Expand All @@ -73,14 +77,21 @@ public Map<TableStatisticType, T> getTableStatistics()
return tableStatistics;
}

@JsonProperty
@JsonSerialize(keyUsing = ColumnStatisticMetadataKeySerializer.class)
@JsonDeserialize(keyUsing = ColumnStatisticMetadataKeyDeserializer.class)
@JsonIgnore
public Map<ColumnStatisticMetadata, T> getColumnStatistics()
{
return columnStatistics;
}

@JsonProperty
@Deprecated // for JSON serialization only
public List<ColumnStatisticAggregationsDescriptor<T>> getColumnStatisticsList()
{
return columnStatistics.entrySet().stream()
.map(entry -> new ColumnStatisticAggregationsDescriptor<T>(entry.getKey(), entry.getValue()))
.collect(toImmutableList());
}

@Override
public boolean equals(Object o)
{
Expand Down Expand Up @@ -159,40 +170,5 @@ public StatisticAggregationsDescriptor<T> build()
}
}

public static class ColumnStatisticMetadataKeySerializer
extends JsonSerializer<ColumnStatisticMetadata>
{
@Override
public void serialize(ColumnStatisticMetadata value, JsonGenerator gen, SerializerProvider serializers)
throws IOException
{
gen.writeFieldName(serialize(verifyNotNull(value, "value is null")));
}

@VisibleForTesting
static String serialize(ColumnStatisticMetadata value)
{
return value.getStatisticType().name() + ":" + value.getColumnName();
}
}

public static class ColumnStatisticMetadataKeyDeserializer
extends KeyDeserializer
{
@Override
public ColumnStatisticMetadata deserializeKey(String key, DeserializationContext ctxt)
{
return deserialize(requireNonNull(key, "key is null"));
}

@VisibleForTesting
static ColumnStatisticMetadata deserialize(String value)
{
int separatorIndex = value.indexOf(':');
checkArgument(separatorIndex >= 0, "separator not found: %s", value);
String statisticType = value.substring(0, separatorIndex);
String column = value.substring(separatorIndex + 1);
return new ColumnStatisticMetadata(column, ColumnStatisticType.valueOf(statisticType));
}
}
public record ColumnStatisticAggregationsDescriptor<T>(ColumnStatisticMetadata metadata, T input) {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import io.trino.metadata.Metadata;
import io.trino.metadata.TableHandle;
import io.trino.spi.connector.ColumnHandle;
import io.trino.spi.expression.FunctionName;
import io.trino.spi.predicate.Domain;
import io.trino.spi.predicate.NullableValue;
import io.trino.spi.predicate.Range;
Expand Down Expand Up @@ -1458,9 +1459,22 @@ private void printStatisticAggregationsInfo(
}

for (Map.Entry<ColumnStatisticMetadata, Symbol> columnStatistic : columnStatistics.entrySet()) {
String aggregationName;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this functionality covered by any tests?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, there are no fine-grained tests for EXPLAIN.

if (columnStatistic.getKey().getStatisticTypeIfPresent().isPresent()) {
aggregationName = columnStatistic.getKey().getStatisticType().name();
}
else {
FunctionName aggregation = columnStatistic.getKey().getAggregation();
if (aggregation.getCatalogSchema().isPresent()) {
aggregationName = aggregation.getCatalogSchema().get() + "." + aggregation.getName();
}
else {
aggregationName = aggregation.getName();
}
}
nodeOutput.appendDetails(
indentString(1) + "%s[%s] => [%s := %s]",
columnStatistic.getKey().getStatisticType(),
aggregationName,
anonymizer.anonymizeColumn(columnStatistic.getKey().getColumnName()),
anonymizer.anonymize(columnStatistic.getValue()),
formatAggregation(anonymizer, aggregations.get(columnStatistic.getValue())));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import com.google.common.collect.ImmutableList;
import com.google.common.reflect.TypeToken;
import io.airlift.json.JsonCodec;
import io.trino.spi.expression.FunctionName;
import io.trino.spi.statistics.ColumnStatisticMetadata;
import io.trino.spi.statistics.ColumnStatisticType;
import io.trino.sql.planner.Symbol;
Expand All @@ -24,25 +25,12 @@

import static io.trino.spi.statistics.TableStatisticType.ROW_COUNT;
import static io.trino.spi.type.BigintType.BIGINT;
import static io.trino.sql.planner.plan.StatisticAggregationsDescriptor.ColumnStatisticMetadataKeyDeserializer.deserialize;
import static io.trino.sql.planner.plan.StatisticAggregationsDescriptor.ColumnStatisticMetadataKeySerializer.serialize;
import static io.trino.testing.assertions.Assert.assertEquals;

public class TestStatisticAggregationsDescriptor
{
private static final ImmutableList<String> COLUMNS = ImmutableList.of("", "col1", "$:###:;", "abc+dddd___");

@Test
public void testColumnStatisticMetadataKeySerializationRoundTrip()
{
for (String column : COLUMNS) {
for (ColumnStatisticType type : ColumnStatisticType.values()) {
ColumnStatisticMetadata expected = new ColumnStatisticMetadata(column, type);
assertEquals(deserialize(serialize(expected)), expected);
}
}
}

@Test
public void testSerializationRoundTrip()
{
Expand All @@ -64,6 +52,8 @@ private static StatisticAggregationsDescriptor<Symbol> createTestDescriptor()
for (ColumnStatisticType type : ColumnStatisticType.values()) {
builder.addColumnStatistic(new ColumnStatisticMetadata(column, type), testSymbol(symbolAllocator));
}
builder.addColumnStatistic(new ColumnStatisticMetadata(column, new FunctionName("count")), testSymbol(symbolAllocator));
builder.addColumnStatistic(new ColumnStatisticMetadata(column, new FunctionName("count_if")), testSymbol(symbolAllocator));
builder.addGrouping(column, testSymbol(symbolAllocator));
}
builder.addTableStatistic(ROW_COUNT, testSymbol(symbolAllocator));
Expand Down
28 changes: 28 additions & 0 deletions core/trino-spi/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,34 @@
<code>java.method.removed</code>
<old>method io.trino.spi.type.TypeSignature io.trino.spi.type.VarcharType::getParametrizedVarcharSignature(java.lang.String)</old>
</item>
<item>
<ignore>true</ignore>
<code>java.annotation.removed</code>
<old>parameter void io.trino.spi.statistics.ColumnStatisticMetadata::&lt;init&gt;(===java.lang.String===, io.trino.spi.statistics.ColumnStatisticType)</old>
<new>parameter void io.trino.spi.statistics.ColumnStatisticMetadata::&lt;init&gt;(===java.lang.String===, io.trino.spi.statistics.ColumnStatisticType)</new>
<annotation>@com.fasterxml.jackson.annotation.JsonProperty("columnName")</annotation>
</item>
<item>
<ignore>true</ignore>
<code>java.annotation.removed</code>
<old>parameter void io.trino.spi.statistics.ColumnStatisticMetadata::&lt;init&gt;(java.lang.String, ===io.trino.spi.statistics.ColumnStatisticType===)</old>
<new>parameter void io.trino.spi.statistics.ColumnStatisticMetadata::&lt;init&gt;(java.lang.String, ===io.trino.spi.statistics.ColumnStatisticType===)</new>
<annotation>@com.fasterxml.jackson.annotation.JsonProperty("statisticType")</annotation>
</item>
<item>
<ignore>true</ignore>
<code>java.annotation.removed</code>
<old>method void io.trino.spi.statistics.ColumnStatisticMetadata::&lt;init&gt;(java.lang.String, io.trino.spi.statistics.ColumnStatisticType)</old>
<new>method void io.trino.spi.statistics.ColumnStatisticMetadata::&lt;init&gt;(java.lang.String, io.trino.spi.statistics.ColumnStatisticType)</new>
<annotation>@com.fasterxml.jackson.annotation.JsonCreator</annotation>
</item>
<item>
<ignore>true</ignore>
<code>java.annotation.removed</code>
<old>method io.trino.spi.statistics.ColumnStatisticType io.trino.spi.statistics.ColumnStatisticMetadata::getStatisticType()</old>
<new>method io.trino.spi.statistics.ColumnStatisticType io.trino.spi.statistics.ColumnStatisticMetadata::getStatisticType()</new>
<annotation>@com.fasterxml.jackson.annotation.JsonProperty</annotation>
</item>
<item>
<code>java.method.visibilityReduced</code>
<old>method void io.trino.spi.block.DictionaryBlock::&lt;init&gt;(int, int, io.trino.spi.block.Block, int[], boolean, boolean, io.trino.spi.block.DictionaryId)</old>
Expand Down
Loading