diff --git a/core/src/main/java/org/apache/iceberg/CatalogUtil.java b/core/src/main/java/org/apache/iceberg/CatalogUtil.java index 72b4813a1fac..2d16b19fc90d 100644 --- a/core/src/main/java/org/apache/iceberg/CatalogUtil.java +++ b/core/src/main/java/org/apache/iceberg/CatalogUtil.java @@ -117,6 +117,11 @@ public static void dropTableData(FileIO io, TableMetadata metadata) { Iterables.transform(metadata.previousFiles(), TableMetadata.MetadataLogEntry::file), "previous metadata", true); + deleteFiles( + io, + Iterables.transform(metadata.statisticsFiles(), StatisticsFile::path), + "statistics", + true); deleteFile(io, metadata.metadataFileLocation(), "metadata"); } diff --git a/core/src/test/java/org/apache/iceberg/hadoop/TestCatalogUtilDropTable.java b/core/src/test/java/org/apache/iceberg/hadoop/TestCatalogUtilDropTable.java index 478ac3a8c2b4..b8511bef5281 100644 --- a/core/src/test/java/org/apache/iceberg/hadoop/TestCatalogUtilDropTable.java +++ b/core/src/test/java/org/apache/iceberg/hadoop/TestCatalogUtilDropTable.java @@ -18,16 +18,27 @@ */ package org.apache.iceberg.hadoop; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Set; +import java.util.UUID; import java.util.stream.Collectors; import java.util.stream.StreamSupport; import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.GenericBlobMetadata; +import org.apache.iceberg.GenericStatisticsFile; import org.apache.iceberg.ManifestFile; import org.apache.iceberg.Snapshot; +import org.apache.iceberg.StatisticsFile; import org.apache.iceberg.TableMetadata; import org.apache.iceberg.TableProperties; import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.puffin.Blob; +import org.apache.iceberg.puffin.Puffin; +import org.apache.iceberg.puffin.PuffinWriter; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; @@ -38,19 +49,28 @@ public class TestCatalogUtilDropTable extends HadoopTableTestBase { @Test - public void dropTableDataDeletesExpectedFiles() { + public void dropTableDataDeletesExpectedFiles() throws IOException { table.newFastAppend().appendFile(FILE_A).commit(); table.newAppend().appendFile(FILE_B).commit(); + StatisticsFile statisticsFile = + writeStatsFile( + table.currentSnapshot().snapshotId(), + table.currentSnapshot().sequenceNumber(), + tableLocation + "/metadata/" + UUID.randomUUID() + ".stats", + table.io()); + table.updateStatistics().setStatistics(statisticsFile.snapshotId(), statisticsFile).commit(); - TableMetadata tableMetadata = readMetadataVersion(3); + TableMetadata tableMetadata = readMetadataVersion(4); Set snapshotSet = Sets.newHashSet(table.snapshots()); Set manifestListLocations = manifestListLocations(snapshotSet); Set manifestLocations = manifestLocations(snapshotSet, table.io()); Set dataLocations = dataLocations(snapshotSet, table.io()); Set metadataLocations = metadataLocations(tableMetadata); + Set statsLocations = statsLocations(tableMetadata); Assertions.assertThat(manifestListLocations).as("should have 2 manifest lists").hasSize(2); - Assertions.assertThat(metadataLocations).as("should have 3 metadata locations").hasSize(3); + Assertions.assertThat(metadataLocations).as("should have 4 metadata locations").hasSize(4); + Assertions.assertThat(statsLocations).as("should have 1 stats file").hasSize(1); FileIO fileIO = Mockito.mock(FileIO.class); Mockito.when(fileIO.newInputFile(Mockito.anyString())) @@ -69,7 +89,8 @@ public void dropTableDataDeletesExpectedFiles() { manifestListLocations.size() + manifestLocations.size() + dataLocations.size() - + metadataLocations.size())) + + metadataLocations.size() + + statsLocations.size())) .deleteFile(argumentCaptor.capture()); List deletedPaths = argumentCaptor.getAllValues(); @@ -85,6 +106,9 @@ public void dropTableDataDeletesExpectedFiles() { Assertions.assertThat(deletedPaths) .as("should contain all created metadata locations") .containsAll(metadataLocations); + Assertions.assertThat(deletedPaths) + .as("should contain all created statistic") + .containsAll(statsLocations); } @Test @@ -181,4 +205,34 @@ private Set metadataLocations(TableMetadata tableMetadata) { metadataLocations.add(tableMetadata.metadataFileLocation()); return metadataLocations; } + + private Set statsLocations(TableMetadata tableMetadata) { + return tableMetadata.statisticsFiles().stream() + .map(StatisticsFile::path) + .collect(Collectors.toSet()); + } + + private StatisticsFile writeStatsFile( + long snapshotId, long snapshotSequenceNumber, String statsLocation, FileIO fileIO) + throws IOException { + try (PuffinWriter puffinWriter = Puffin.write(fileIO.newOutputFile(statsLocation)).build()) { + puffinWriter.add( + new Blob( + "some-blob-type", + ImmutableList.of(1), + snapshotId, + snapshotSequenceNumber, + ByteBuffer.wrap("blob content".getBytes(StandardCharsets.UTF_8)))); + puffinWriter.finish(); + + return new GenericStatisticsFile( + snapshotId, + statsLocation, + puffinWriter.fileSize(), + puffinWriter.footerSize(), + puffinWriter.writtenBlobsMetadata().stream() + .map(GenericBlobMetadata::from) + .collect(ImmutableList.toImmutableList())); + } + } }