apache · karuppayya · Apr 9, 2021 · aokolnychyi · Apr 16, 2021 · RussellSpitzer
diff --git a/spark/src/main/java/org/apache/iceberg/spark/actions/BaseSparkAction.java b/spark/src/main/java/org/apache/iceberg/spark/actions/BaseSparkAction.java
@@ -159,7 +159,7 @@ protected Dataset<Row> buildValidDataFileDF(Table table) {
         .repartition(spark.sessionState().conf().numShufflePartitions()) // avoid adaptive execution combining tasks
         .as(Encoders.bean(ManifestFileBean.class));
 
-    return allManifests.flatMap(new ReadManifest(ioBroadcast), Encoders.STRING()).toDF("file_path");
+    return allManifests.flatMap(new ReadManifest(ioBroadcast), Encoders.STRING()).toDF("file_path").distinct();
   }
 
   protected Dataset<Row> buildManifestFileDF(Table table) {
@@ -173,7 +173,7 @@ protected Dataset<Row> buildManifestListDF(Table table) {
 
   protected Dataset<Row> buildOtherMetadataFileDF(TableOperations ops) {
     List<String> otherMetadataFiles = getOtherMetadataFilePaths(ops);
-    return spark.createDataset(otherMetadataFiles, Encoders.STRING()).toDF("file_path");
+    return spark.createDataset(otherMetadataFiles, Encoders.STRING()).toDF("file_path").distinct();
   }
 
   protected Dataset<Row> buildValidMetadataFileDF(Table table, TableOperations ops) {