diff --git a/core/src/main/java/org/apache/iceberg/MetricsUtil.java b/core/src/main/java/org/apache/iceberg/MetricsUtil.java index aa33873d4986..a741096666f9 100644 --- a/core/src/main/java/org/apache/iceberg/MetricsUtil.java +++ b/core/src/main/java/org/apache/iceberg/MetricsUtil.java @@ -172,21 +172,29 @@ public static MetricsModes.MetricsMode metricsMode( "Lower bound", DataFile.LOWER_BOUNDS, Types.NestedField::type, - (file, field) -> - file.lowerBounds() == null - ? null - : Conversions.fromByteBuffer( - field.type(), file.lowerBounds().get(field.fieldId()))), + (file, field) -> { + if (file.lowerBounds() == null) { + return null; + } + Object value = + Conversions.fromByteBuffer( + field.type(), file.lowerBounds().get(field.fieldId())); + return (value instanceof java.util.UUID) ? value.toString() : value; + }), new ReadableMetricColDefinition( "upper_bound", "Upper bound", DataFile.UPPER_BOUNDS, Types.NestedField::type, - (file, field) -> - file.upperBounds() == null - ? null - : Conversions.fromByteBuffer( - field.type(), file.upperBounds().get(field.fieldId())))); + (file, field) -> { + if (file.upperBounds() == null) { + return null; + } + Object value = + Conversions.fromByteBuffer( + field.type(), file.upperBounds().get(field.fieldId())); + return (value instanceof java.util.UUID) ? value.toString() : value; + })); public static final String READABLE_METRICS = "readable_metrics"; diff --git a/core/src/test/java/org/apache/iceberg/TestMetrics.java b/core/src/test/java/org/apache/iceberg/TestMetrics.java index 4da8c480ea6e..cb2a0332cb16 100644 --- a/core/src/test/java/org/apache/iceberg/TestMetrics.java +++ b/core/src/test/java/org/apache/iceberg/TestMetrics.java @@ -34,6 +34,9 @@ import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.UUID; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.iceberg.data.GenericRecord; import org.apache.iceberg.data.Record; import org.apache.iceberg.io.InputFile; @@ -494,6 +497,58 @@ public void testMetricsForTopLevelWithMultipleRowGroup() throws Exception { 6, Types.DecimalType.of(10, 2), new BigDecimal("2.00"), new BigDecimal("201.00"), metrics); } + @TestTemplate + public void testMetricsForUUIDField() throws IOException { + assumeThat(fileFormat()) + .as("ORC writer does not write UUID bounds, skip for ORC") + .isNotEqualTo(FileFormat.ORC); + + // prepare schema with UUID + Schema uuidSchema = new Schema(required(1, "uuidCol", Types.UUIDType.get())); + PartitionSpec spec = PartitionSpec.unpartitioned(); + + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + UUID uuid3 = UUID.randomUUID(); + + Record rec1 = GenericRecord.create(uuidSchema); + rec1.setField("uuidCol", uuid1); + Record rec2 = GenericRecord.create(uuidSchema); + rec2.setField("uuidCol", uuid2); + Record rec3 = GenericRecord.create(uuidSchema); + rec3.setField("uuidCol", uuid3); + + Metrics metrics = getMetrics(uuidSchema, rec1, rec2, rec3); + + DataFile file = + DataFiles.builder(spec) + .withPath("/tmp/data-" + UUID.randomUUID() + ".parquet") + .withFileSizeInBytes(128) + .withRecordCount(metrics.recordCount()) + .withMetrics(metrics) + .build(); + + Schema wrapperSchema = MetricsUtil.readableMetricsSchema(uuidSchema, new Schema()); + + Types.StructType projected = + wrapperSchema.findField(MetricsUtil.READABLE_METRICS).type().asStructType(); + + MetricsUtil.ReadableMetricsStruct readable = + MetricsUtil.readableMetricsStruct(uuidSchema, file, projected); + + StructLike colMetrics = readable.get(0, StructLike.class); + // lower_bound + String lower = colMetrics.get(4, String.class); + // upper_bound + String upper = colMetrics.get(5, String.class); + + List sorted = + Stream.of(uuid1, uuid2, uuid3).map(UUID::toString).sorted().collect(Collectors.toList()); + + assertThat(lower).isEqualTo(sorted.get(0)); + assertThat(upper).isEqualTo(sorted.get(2)); + } + @TestTemplate public void testMetricsForNestedStructFieldsWithMultipleRowGroup() throws IOException { assumeThat(supportsSmallRowGroups())