diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetVariantUtil.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetVariantUtil.java index 3f02b5183b80..ac418a1127bd 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetVariantUtil.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetVariantUtil.java @@ -25,11 +25,8 @@ import java.util.Comparator; import java.util.Deque; import java.util.List; -import java.util.Map; import java.util.Objects; import java.util.Optional; -import java.util.function.Function; -import java.util.stream.Collectors; import org.apache.iceberg.expressions.PathUtil; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -403,19 +400,14 @@ public Type array(VariantArray array, List elementResults) { return null; } - // Choose most common type as shredding type and build 3-level list - Type defaultTYpe = elementResults.get(0); - Type shredType = - elementResults.stream() - .filter(Objects::nonNull) - .collect(Collectors.groupingBy(Function.identity(), Collectors.counting())) - .entrySet() - .stream() - .max(Map.Entry.comparingByValue()) - .map(Map.Entry::getKey) - .orElse(defaultTYpe); - - return list(shredType); + // Shred if all the elements are of a uniform type and build 3-level list + Type shredType = elementResults.get(0); + if (shredType != null + && elementResults.stream().allMatch(type -> Objects.equals(type, shredType))) { + return list(shredType); + } + + return null; } private static GroupType list(Type shreddedType) { diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java index 36ca2cf477ed..f3d58ab00e4d 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestVariantWriters.java @@ -103,7 +103,7 @@ public class TestVariantWriters { private static final ByteBuffer NESTED_ARRAY_BUFFER = VariantTestUtil.createArray( array(Variants.of("string"), Variants.of("iceberg")), - array(Variants.of("string"), Variants.of("iceberg"))); + array(Variants.of("apple"), Variants.of("banana"))); private static final ByteBuffer MIXED_NESTED_ARRAY_BUFFER = VariantTestUtil.createArray( array(Variants.of("string"), Variants.of("iceberg"), Variants.of(34)),