Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions core/src/main/java/org/apache/iceberg/CatalogUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,11 @@ public static void dropTableData(FileIO io, TableMetadata metadata) {
Iterables.transform(metadata.previousFiles(), TableMetadata.MetadataLogEntry::file),
"previous metadata",
true);
deleteFiles(
io,
Iterables.transform(metadata.statisticsFiles(), StatisticsFile::path),
"statistics",
true);
deleteFile(io, metadata.metadataFileLocation(), "metadata");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,27 @@
*/
package org.apache.iceberg.hadoop;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.apache.iceberg.CatalogUtil;
import org.apache.iceberg.GenericBlobMetadata;
import org.apache.iceberg.GenericStatisticsFile;
import org.apache.iceberg.ManifestFile;
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.StatisticsFile;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.puffin.Blob;
import org.apache.iceberg.puffin.Puffin;
import org.apache.iceberg.puffin.PuffinWriter;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.Test;
Expand All @@ -38,19 +49,28 @@
public class TestCatalogUtilDropTable extends HadoopTableTestBase {

@Test
public void dropTableDataDeletesExpectedFiles() {
public void dropTableDataDeletesExpectedFiles() throws IOException {
table.newFastAppend().appendFile(FILE_A).commit();
table.newAppend().appendFile(FILE_B).commit();
StatisticsFile statisticsFile =
writeStatsFile(
table.currentSnapshot().snapshotId(),
table.currentSnapshot().sequenceNumber(),
tableLocation + "/metadata/" + UUID.randomUUID() + ".stats",
table.io());
table.updateStatistics().setStatistics(statisticsFile.snapshotId(), statisticsFile).commit();

TableMetadata tableMetadata = readMetadataVersion(3);
TableMetadata tableMetadata = readMetadataVersion(4);
Set<Snapshot> snapshotSet = Sets.newHashSet(table.snapshots());

Set<String> manifestListLocations = manifestListLocations(snapshotSet);
Set<String> manifestLocations = manifestLocations(snapshotSet, table.io());
Set<String> dataLocations = dataLocations(snapshotSet, table.io());
Set<String> metadataLocations = metadataLocations(tableMetadata);
Set<String> statsLocations = statsLocations(tableMetadata);
Assertions.assertThat(manifestListLocations).as("should have 2 manifest lists").hasSize(2);
Assertions.assertThat(metadataLocations).as("should have 3 metadata locations").hasSize(3);
Assertions.assertThat(metadataLocations).as("should have 4 metadata locations").hasSize(4);
Assertions.assertThat(statsLocations).as("should have 1 stats file").hasSize(1);

FileIO fileIO = Mockito.mock(FileIO.class);
Mockito.when(fileIO.newInputFile(Mockito.anyString()))
Expand All @@ -69,7 +89,8 @@ public void dropTableDataDeletesExpectedFiles() {
manifestListLocations.size()
+ manifestLocations.size()
+ dataLocations.size()
+ metadataLocations.size()))
+ metadataLocations.size()
+ statsLocations.size()))
.deleteFile(argumentCaptor.capture());

List<String> deletedPaths = argumentCaptor.getAllValues();
Expand All @@ -85,6 +106,9 @@ public void dropTableDataDeletesExpectedFiles() {
Assertions.assertThat(deletedPaths)
.as("should contain all created metadata locations")
.containsAll(metadataLocations);
Assertions.assertThat(deletedPaths)
.as("should contain all created statistic")
.containsAll(statsLocations);
}

@Test
Expand Down Expand Up @@ -181,4 +205,34 @@ private Set<String> metadataLocations(TableMetadata tableMetadata) {
metadataLocations.add(tableMetadata.metadataFileLocation());
return metadataLocations;
}

private Set<String> statsLocations(TableMetadata tableMetadata) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: these two new private methods can be static

return tableMetadata.statisticsFiles().stream()
.map(StatisticsFile::path)
.collect(Collectors.toSet());
}

private StatisticsFile writeStatsFile(
long snapshotId, long snapshotSequenceNumber, String statsLocation, FileIO fileIO)
throws IOException {
try (PuffinWriter puffinWriter = Puffin.write(fileIO.newOutputFile(statsLocation)).build()) {
puffinWriter.add(
new Blob(
"some-blob-type",
ImmutableList.of(1),
snapshotId,
snapshotSequenceNumber,
ByteBuffer.wrap("blob content".getBytes(StandardCharsets.UTF_8))));
puffinWriter.finish();

return new GenericStatisticsFile(
snapshotId,
statsLocation,
puffinWriter.fileSize(),
puffinWriter.footerSize(),
puffinWriter.writtenBlobsMetadata().stream()
.map(GenericBlobMetadata::from)
.collect(ImmutableList.toImmutableList()));
}
}
}