apache · XuQianJin-Stars · Jan 31, 2023 · Jan 13, 2023 · Jan 17, 2023 · Jan 17, 2023
diff --git a/...in/java/org/apache/hudi/table/action/commit/FlinkDeletePartitionCommitActionExecutor.java b/...in/java/org/apache/hudi/table/action/commit/FlinkDeletePartitionCommitActionExecutor.java
@@ -27,6 +27,7 @@
 import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
+import org.apache.hudi.common.table.view.SyncableFileSystemView;
 import org.apache.hudi.common.util.HoodieTimer;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
@@ -37,11 +38,13 @@
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 
 import java.time.Duration;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 import static org.apache.hudi.common.table.timeline.HoodieInstant.State.REQUESTED;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.REPLACE_COMMIT_ACTION;
@@ -62,6 +65,8 @@ public FlinkDeletePartitionCommitActionExecutor(HoodieEngineContext context,
 
   @Override
   public HoodieWriteMetadata<List<WriteStatus>> execute() {
+    checkPreconditions();
+
     try {
       HoodieTimer timer = new HoodieTimer().startTimer();
       context.setJobStatus(this.getClass().getSimpleName(), "Gather all file ids from all deleting partitions.");
@@ -98,4 +103,43 @@ private List<String> getAllExistingFileIds(String partitionPath) {
     // because new commit is not complete. it is safe to mark all existing file Ids as old files
     return table.getSliceView().getLatestFileSlices(partitionPath).map(FileSlice::getFileId).distinct().collect(Collectors.toList());
   }
+
+  /**
+   * Check if there are any pending table service actions (requested + inflight) on a table affecting the partitions to
+   * be dropped.
+   * <p>
+   * This check is to prevent a drop-partition from proceeding should a partition have a table service action in
+   * the pending stage. If this is allowed to happen, the filegroup that is an input for a table service action, might
+   * also be a candidate for being replaced. As such, when the table service action and drop-partition commits are
+   * committed, there will be two commits replacing a single filegroup.
+   * <p>
+   * For example, a timeline might have an execution order as such:
+   * 000.replacecommit.requested (clustering filegroup_1 + filegroup_2 -> filegroup_3)
+   * 001.replacecommit.requested, 001.replacecommit.inflight, 0001.replacecommit (drop_partition to replace filegroup_1)
+   * 000.replacecommit.inflight (clustering is executed now)
+   * 000.replacecommit (clustering completed)
+   * For an execution order as shown above, 000.replacecommit and 001.replacecommit will both flag filegroup_1 to be replaced.
+   * This will cause downstream duplicate key errors when a map is being constructed.
+   */
+  private void checkPreconditions() {
+    List<String> instantsOfOffendingPendingTableServiceAction = new ArrayList<>();
+    // ensure that there are no pending inflight clustering/compaction operations involving this partition
+    SyncableFileSystemView fileSystemView = (SyncableFileSystemView) table.getSliceView();
+
+    // separating the iteration of pending compaction operations from clustering as they return different stream types
+    Stream.concat(fileSystemView.getPendingCompactionOperations(), fileSystemView.getPendingLogCompactionOperations())
+        .filter(op -> partitions.contains(op.getRight().getPartitionPath()))
+        .forEach(op -> instantsOfOffendingPendingTableServiceAction.add(op.getLeft()));
+
+    fileSystemView.getFileGroupsInPendingClustering()
+        .filter(fgIdInstantPair -> partitions.contains(fgIdInstantPair.getLeft().getPartitionPath()))
+        .forEach(x -> instantsOfOffendingPendingTableServiceAction.add(x.getRight().getTimestamp()));
+
+    if (instantsOfOffendingPendingTableServiceAction.size() > 0) {
+      throw new HoodieDeletePartitionException("Failed to drop partitions. "
+          + "Please ensure that there are no pending table service actions (clustering/compaction) for the partitions to be deleted: " + partitions + ". "
+          + "Instant(s) of offending pending table service action: "
+          + instantsOfOffendingPendingTableServiceAction.stream().distinct().collect(Collectors.toList()));
+    }
+  }
 }
diff --git a/...in/java/org/apache/hudi/table/action/commit/SparkDeletePartitionCommitActionExecutor.java b/...in/java/org/apache/hudi/table/action/commit/SparkDeletePartitionCommitActionExecutor.java
@@ -25,6 +25,7 @@
 import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
+import org.apache.hudi.common.table.view.SyncableFileSystemView;
 import org.apache.hudi.common.util.HoodieTimer;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
@@ -38,10 +39,13 @@
 import org.apache.hadoop.fs.Path;
 
 import java.time.Duration;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 import static org.apache.hudi.common.table.timeline.HoodieInstant.State.REQUESTED;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.REPLACE_COMMIT_ACTION;
@@ -59,6 +63,7 @@ public SparkDeletePartitionCommitActionExecutor(HoodieEngineContext context,
 
   @Override
   public HoodieWriteMetadata<HoodieData<WriteStatus>> execute() {
+    checkPreconditions();
     try {
       HoodieTimer timer = HoodieTimer.start();
       context.setJobStatus(this.getClass().getSimpleName(), "Gather all file ids from all deleting partitions.");
@@ -90,4 +95,43 @@ public HoodieWriteMetadata<HoodieData<WriteStatus>> execute() {
       throw new HoodieDeletePartitionException("Failed to drop partitions for commit time " + instantTime, e);
     }
   }
+
+  /**
+   * Check if there are any pending table service actions (requested + inflight) on a table affecting the partitions to
+   * be dropped.
+   * <p>
+   * This check is to prevent a drop-partition from proceeding should a partition have a table service action in
+   * the pending stage. If this is allowed to happen, the filegroup that is an input for a table service action, might
+   * also be a candidate for being replaced. As such, when the table service action and drop-partition commits are
+   * committed, there will be two commits replacing a single filegroup.
+   * <p>
+   * For example, a timeline might have an execution order as such:
+   * 000.replacecommit.requested (clustering filegroup_1 + filegroup_2 -> filegroup_3)
+   * 001.replacecommit.requested, 001.replacecommit.inflight, 0001.replacecommit (drop_partition to replace filegroup_1)
+   * 000.replacecommit.inflight (clustering is executed now)
+   * 000.replacecommit (clustering completed)
+   * For an execution order as shown above, 000.replacecommit and 001.replacecommit will both flag filegroup_1 to be replaced.
+   * This will cause  downstream duplicate key errors when a map is being constructed.
+   */
+  private void checkPreconditions() {
+    List<String> instantsOfOffendingPendingTableServiceAction = new ArrayList<>();
+    // ensure that there are no pending inflight clustering/compaction operations involving this partition
+    SyncableFileSystemView fileSystemView = (SyncableFileSystemView) table.getSliceView();
+
+    // separating the iteration of pending compaction operations from clustering as they return different stream types
+    Stream.concat(fileSystemView.getPendingCompactionOperations(), fileSystemView.getPendingLogCompactionOperations())
+        .filter(op -> partitions.contains(op.getRight().getPartitionPath()))
+        .forEach(op -> instantsOfOffendingPendingTableServiceAction.add(op.getLeft()));
+
+    fileSystemView.getFileGroupsInPendingClustering()
+        .filter(fgIdInstantPair -> partitions.contains(fgIdInstantPair.getLeft().getPartitionPath()))
+        .forEach(x -> instantsOfOffendingPendingTableServiceAction.add(x.getRight().getTimestamp()));
+
+    if (instantsOfOffendingPendingTableServiceAction.size() > 0) {
+      throw new HoodieDeletePartitionException("Failed to drop partitions. "
+          + "Please ensure that there are no pending table service actions (clustering/compaction) for the partitions to be deleted: " + partitions + ". "
+          + "Instant(s) of offending pending table service action: "
+          + instantsOfOffendingPendingTableServiceAction.stream().distinct().collect(Collectors.toList()));
+    }
+  }
 }
diff --git a/...rce/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableDropPartition.scala b/...rce/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableDropPartition.scala
@@ -18,9 +18,11 @@
 package org.apache.spark.sql.hudi
 
 import org.apache.hudi.DataSourceWriteOptions._
-import org.apache.hudi.HoodieSparkUtils
+import org.apache.hudi.{HoodieCLIUtils, HoodieSparkUtils}
 import org.apache.hudi.common.model.HoodieCommitMetadata
 import org.apache.hudi.common.table.HoodieTableMetaClient
+import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieInstant}
+import org.apache.hudi.common.util.{Option => HOption}
 import org.apache.hudi.common.util.{PartitionPathEncodeUtils, StringUtils}
 import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.hudi.keygen.{ComplexKeyGenerator, SimpleKeyGenerator}
@@ -396,4 +398,45 @@ class TestAlterTableDropPartition extends HoodieSparkSqlTestBase {
       }
     }
   }
+
+  test("Prevent a partition from being dropped if there are pending table service actions") {
+    withTempDir { tmp =>
+      Seq("cow").foreach { tableType =>
+        val tableName = generateTableName
+        val basePath = s"${tmp.getCanonicalPath}t/$tableName"
+        spark.sql(
+          s"""
+             |create table $tableName (
+             |  id int,
+             |  name string,
+             |  price double,
+             |  ts long
+             |) using hudi
+             | options (
+             |  primaryKey ='id',
+             |  type = '$tableType',
+             |  preCombineField = 'ts'
+             | )
+             | partitioned by(ts)
+             | location '$basePath'
+       """.stripMargin)
+        spark.sql(s"insert into $tableName values(1, 'a1', 10, 1000)")
+        spark.sql(s"insert into $tableName values(2, 'a2', 10, 1001)")
+        spark.sql(s"insert into $tableName values(3, 'a3', 10, 1002)")
+        val client = HoodieCLIUtils.createHoodieClientFromPath(spark, basePath, Map.empty)
+        // Generate the first clustering plan
+        val firstScheduleInstant = HoodieActiveTimeline.createNewInstantTime
+        client.scheduleClusteringAtInstant(firstScheduleInstant, HOption.empty())
+
+        checkAnswer(s"call show_clustering('$tableName')")(
+          Seq(firstScheduleInstant, 3, HoodieInstant.State.REQUESTED.name(), "*")
+        )
+
+        val partition = "ts=1002"
+        val errMsg = s"Failed to drop partitions. Please ensure that there are no pending table service actions (clustering/compaction) for the partitions to be deleted: [$partition]"
+        checkExceptionContain(s"ALTER TABLE $tableName DROP PARTITION($partition)")(errMsg)
+      }
+    }
+  }
+
 }