diff --git a/spark/src/main/java/org/apache/iceberg/spark/actions/BaseSparkAction.java b/spark/src/main/java/org/apache/iceberg/spark/actions/BaseSparkAction.java index e760a6de4236..0c1151d5d214 100644 --- a/spark/src/main/java/org/apache/iceberg/spark/actions/BaseSparkAction.java +++ b/spark/src/main/java/org/apache/iceberg/spark/actions/BaseSparkAction.java @@ -159,7 +159,7 @@ protected Dataset buildValidDataFileDF(Table table) { .repartition(spark.sessionState().conf().numShufflePartitions()) // avoid adaptive execution combining tasks .as(Encoders.bean(ManifestFileBean.class)); - return allManifests.flatMap(new ReadManifest(ioBroadcast), Encoders.STRING()).toDF("file_path"); + return allManifests.flatMap(new ReadManifest(ioBroadcast), Encoders.STRING()).toDF("file_path").distinct(); } protected Dataset buildManifestFileDF(Table table) { @@ -173,7 +173,7 @@ protected Dataset buildManifestListDF(Table table) { protected Dataset buildOtherMetadataFileDF(TableOperations ops) { List otherMetadataFiles = getOtherMetadataFilePaths(ops); - return spark.createDataset(otherMetadataFiles, Encoders.STRING()).toDF("file_path"); + return spark.createDataset(otherMetadataFiles, Encoders.STRING()).toDF("file_path").distinct(); } protected Dataset buildValidMetadataFileDF(Table table, TableOperations ops) {