.../main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java

-Original file line number
+Diff line change
@@ Expand Up / @@ -77,6 +77,7 @@ @@
     import org.apache.avro.generic.IndexedRecord;
     import org.apache.hadoop.fs.FileSystem;
     import org.apache.hadoop.fs.Path;
+    import org.apache.hudi.table.marker.WriteMarkersFactory;
     import org.apache.log4j.LogManager;
     import org.apache.log4j.Logger;
     import org.apache.parquet.avro.AvroParquetReader;
@@ Expand Down Expand Up / @@ -144,6 +145,9 @@ public HoodieBootstrapWriteMetadata execute() { @@
           Option<HoodieWriteMetadata> metadataResult = metadataBootstrap(partitionSelections.get(BootstrapMode.METADATA_ONLY));
           // if there are full bootstrap to be performed, perform that too
           Option<HoodieWriteMetadata> fullBootstrapResult = fullBootstrap(partitionSelections.get(BootstrapMode.FULL_RECORD));
+          // Delete the marker directory for the instant
+          WriteMarkersFactory.get(config.getMarkersType(), table, instantTime)
+              .quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
           return new HoodieBootstrapWriteMetadata(metadataResult, fullBootstrapResult);
         } catch (IOException ioe) {
           throw new HoodieIOException(ioe.getMessage(), ioe);
@@ Expand Down @@

...rce/hudi-spark/src/test/scala/org/apache/hudi/functional/TestDataSourceForBootstrap.scala

-Original file line number
+Diff line change
@@ Expand Up / @@ -17,7 +17,7 @@ @@
     package org.apache.hudi.functional
-    import org.apache.hadoop.fs.FileSystem
+    import org.apache.hadoop.fs.{FileSystem, Path}
     import org.apache.hudi.bootstrap.SparkParquetBootstrapDataProvider
     import org.apache.hudi.client.bootstrap.selector.FullRecordBootstrapModeSelector
     import org.apache.hudi.common.fs.FSUtils
@@ Expand All / @@ -31,9 +31,9 @@ import org.apache.spark.sql.{SaveMode, SparkSession} @@
     import org.junit.jupiter.api.Assertions.assertEquals
     import org.junit.jupiter.api.io.TempDir
     import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
     import java.time.Instant
     import java.util.Collections
     import scala.collection.JavaConverters._
     class TestDataSourceForBootstrap {
@@ Expand Down Expand Up / @@ -106,6 +106,8 @@ class TestDataSourceForBootstrap { @@
           DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL,
           extraOpts = Map(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key -> "org.apache.hudi.keygen.NonpartitionedKeyGenerator")
         )
+        // check marked directory clean up
+        assert(!fs.exists(new Path(basePath, ".hoodie/.temp/00000000000001")))
         // Read bootstrapped table and verify count
         var hoodieROViewDF1 = spark.read.format("hudi").load(basePath + "/*")
@@ Expand Down Expand Up / @@ -161,6 +163,9 @@ class TestDataSourceForBootstrap { @@
           Some("datestr"),
           Map(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING.key -> "true"))
+        // check marked directory clean up
+        assert(!fs.exists(new Path(basePath, ".hoodie/.temp/00000000000001")))
         // Read bootstrapped table and verify count
         val hoodieROViewDF1 = spark.read.format("hudi").load(basePath + "/*")
         assertEquals(numRecords, hoodieROViewDF1.count())
@@ Expand Down @@

[HUDI-3001] Clean up the marker directory when finish bootstrap operation #4298

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

yihua merged 1 commit into apache:master from xiarixiaoyao:mark

Dec 16, 2021

-Original file line number
+Diff line change
@@ Expand Up / @@ -77,6 +77,7 @@ @@
     import org.apache.avro.generic.IndexedRecord;
     import org.apache.hadoop.fs.FileSystem;
     import org.apache.hadoop.fs.Path;
+    import org.apache.hudi.table.marker.WriteMarkersFactory;
     import org.apache.log4j.LogManager;
     import org.apache.log4j.Logger;
     import org.apache.parquet.avro.AvroParquetReader;
@@ Expand Down Expand Up / @@ -144,6 +145,9 @@ public HoodieBootstrapWriteMetadata execute() { @@
           Option<HoodieWriteMetadata> metadataResult = metadataBootstrap(partitionSelections.get(BootstrapMode.METADATA_ONLY));
           // if there are full bootstrap to be performed, perform that too
           Option<HoodieWriteMetadata> fullBootstrapResult = fullBootstrap(partitionSelections.get(BootstrapMode.FULL_RECORD));
+          // Delete the marker directory for the instant
+          WriteMarkersFactory.get(config.getMarkersType(), table, instantTime)
+              .quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
           return new HoodieBootstrapWriteMetadata(metadataResult, fullBootstrapResult);
         } catch (IOException ioe) {
           throw new HoodieIOException(ioe.getMessage(), ioe);
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -17,7 +17,7 @@ @@
     package org.apache.hudi.functional
-    import org.apache.hadoop.fs.FileSystem
+    import org.apache.hadoop.fs.{FileSystem, Path}
     import org.apache.hudi.bootstrap.SparkParquetBootstrapDataProvider
     import org.apache.hudi.client.bootstrap.selector.FullRecordBootstrapModeSelector
     import org.apache.hudi.common.fs.FSUtils
@@ Expand All / @@ -31,9 +31,9 @@ import org.apache.spark.sql.{SaveMode, SparkSession} @@
     import org.junit.jupiter.api.Assertions.assertEquals
     import org.junit.jupiter.api.io.TempDir
     import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
     import java.time.Instant
     import java.util.Collections
     import scala.collection.JavaConverters._
     class TestDataSourceForBootstrap {
@@ Expand Down Expand Up / @@ -106,6 +106,8 @@ class TestDataSourceForBootstrap { @@
           DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL,
           extraOpts = Map(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key -> "org.apache.hudi.keygen.NonpartitionedKeyGenerator")
         )
+        // check marked directory clean up
+        assert(!fs.exists(new Path(basePath, ".hoodie/.temp/00000000000001")))
         // Read bootstrapped table and verify count
         var hoodieROViewDF1 = spark.read.format("hudi").load(basePath + "/*")
@@ Expand Down Expand Up / @@ -161,6 +163,9 @@ class TestDataSourceForBootstrap { @@
           Some("datestr"),
           Map(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING.key -> "true"))
+        // check marked directory clean up
+        assert(!fs.exists(new Path(basePath, ".hoodie/.temp/00000000000001")))
         // Read bootstrapped table and verify count
         val hoodieROViewDF1 = spark.read.format("hudi").load(basePath + "/*")
         assertEquals(numRecords, hoodieROViewDF1.count())
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[HUDI-3001] Clean up the marker directory when finish bootstrap operation #4298

Uh oh!

Diff view

Diff view

There are no files selected for viewing