diff --git a/orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java b/orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java index a5bcc2f8b161..c03d7f15fc02 100644 --- a/orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java +++ b/orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java @@ -23,10 +23,9 @@ import java.nio.ByteBuffer; import java.sql.Timestamp; import java.time.temporal.ChronoUnit; -import java.util.List; +import java.util.Collections; import java.util.Map; import java.util.Optional; -import java.util.Queue; import java.util.Set; import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; @@ -35,10 +34,7 @@ import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.hadoop.HadoopInputFile; import org.apache.iceberg.io.InputFile; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.relocated.com.google.common.collect.Maps; -import org.apache.iceberg.relocated.com.google.common.collect.Queues; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.Conversions; import org.apache.iceberg.types.Type; @@ -86,7 +82,7 @@ static Metrics fromWriter(Writer writer) { private static Metrics buildOrcMetrics(final long numOfRows, final TypeDescription orcSchema, final ColumnStatistics[] colStats) { final Schema schema = ORCSchemaUtil.convert(orcSchema); - final Set columnsInContainers = findColumnsInContainers(schema, orcSchema); + final Set columnsInContainers = findColumnsInContainers(orcSchema); Map columnSizes = Maps.newHashMapWithExpectedSize(colStats.length); Map valueCounts = Maps.newHashMapWithExpectedSize(colStats.length); Map nullCounts = Maps.newHashMapWithExpectedSize(colStats.length); @@ -214,64 +210,33 @@ private static Optional fromOrcMax(Types.NestedField column, return Optional.ofNullable(Conversions.toByteBuffer(column.type(), max)); } - private static Set findColumnsInContainers(Schema schema, - TypeDescription orcSchema) { - ColumnsInContainersVisitor visitor = new ColumnsInContainersVisitor(); - OrcSchemaWithTypeVisitor.visit(schema, orcSchema, visitor); - return visitor.getColumnsInContainers(); + private static Set findColumnsInContainers(TypeDescription orcSchema) { + Set columnsInContainers = Sets.newHashSet(); + findColumnsInContainers(orcSchema, columnsInContainers, false); + return Collections.unmodifiableSet(columnsInContainers); } - private static class ColumnsInContainersVisitor extends OrcSchemaWithTypeVisitor { - - private final Set columnsInContainers; - - private ColumnsInContainersVisitor() { - columnsInContainers = Sets.newHashSet(); - } - - public Set getColumnsInContainers() { - return columnsInContainers; - } - - private Set flatten(TypeDescription rootType) { - if (rootType == null) { - return ImmutableSet.of(); - } - - final Set flatTypes = Sets.newHashSetWithExpectedSize(rootType.getMaximumId()); - final Queue queue = Queues.newLinkedBlockingQueue(); - queue.add(rootType); - while (!queue.isEmpty()) { - TypeDescription type = queue.remove(); - flatTypes.add(type); - queue.addAll(Optional.ofNullable(type.getChildren()).orElse(ImmutableList.of())); - } - return flatTypes; + private static void findColumnsInContainers(TypeDescription column, + Set columnsInContainers, + boolean isInContainers) { + if (isInContainers) { + columnsInContainers.add(column); } - - @Override - public TypeDescription record(Types.StructType iStruct, TypeDescription record, - List names, List fields) { - return record; - } - - @Override - public TypeDescription list(Types.ListType iList, TypeDescription array, TypeDescription element) { - columnsInContainers.addAll(flatten(element)); - return array; - } - - @Override - public TypeDescription map(Types.MapType iMap, TypeDescription map, - TypeDescription key, TypeDescription value) { - columnsInContainers.addAll(flatten(key)); - columnsInContainers.addAll(flatten(value)); - return map; - } - - @Override - public TypeDescription primitive(Type.PrimitiveType iPrimitive, TypeDescription primitive) { - return primitive; + switch (column.getCategory()) { + case LIST: + case MAP: + for (TypeDescription child : column.getChildren()) { + findColumnsInContainers(child, columnsInContainers, true); + } + return; + case STRUCT: + for (TypeDescription child : column.getChildren()) { + findColumnsInContainers(child, columnsInContainers, isInContainers); + } + return; + case UNION: + throw new UnsupportedOperationException("Cannot handle " + column); + default: } } }