diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java index 17fecdeccf0fe..682c2daa1f68e 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java @@ -54,12 +54,12 @@ /** * Performs bootstrap from a non-hudi source. */ -public class BootstrapExecutor implements Serializable { +public class BootstrapExecutor implements Serializable { private static final Logger LOG = LogManager.getLogger(BootstrapExecutor.class); /** - * Config. + * Config. */ private final HoodieDeltaStreamer.Config cfg; @@ -97,9 +97,10 @@ public class BootstrapExecutor implements Serializable { /** * Bootstrap Executor. - * @param cfg DeltaStreamer Config - * @param jssc Java Spark Context - * @param fs File System + * + * @param cfg DeltaStreamer Config + * @param jssc Java Spark Context + * @param fs File System * @param properties Bootstrap Writer Properties * @throws IOException */ @@ -168,9 +169,15 @@ private void syncHive() { } private void initializeTable() throws IOException { - if (fs.exists(new Path(cfg.targetBasePath))) { - throw new HoodieException("target base path already exists at " + cfg.targetBasePath - + ". Cannot bootstrap data on top of an existing table"); + Path basePath = new Path(cfg.targetBasePath); + if (fs.exists(basePath)) { + if (cfg.bootstrapOverwrite) { + LOG.warn("Target base path already exists, overwrite it"); + fs.delete(basePath, true); + } else { + throw new HoodieException("target base path already exists at " + cfg.targetBasePath + + ". Cannot bootstrap data on top of an existing table"); + } } HoodieTableMetaClient.withPropertyBuilder() .setTableType(cfg.tableType) diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java index 2522a605c3e71..3ceb0028751a2 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java @@ -363,6 +363,9 @@ public static class Config implements Serializable { @Parameter(names = {"--run-bootstrap"}, description = "Run bootstrap if bootstrap index is not found") public Boolean runBootstrap = false; + @Parameter(names = {"--bootstrap-overwrite"}, description = "Overwrite existing target table, default false") + public Boolean bootstrapOverwrite = false; + @Parameter(names = {"--bootstrap-index-class"}, description = "subclass of BootstrapIndex") public String bootstrapIndexClass = HFileBootstrapIndex.class.getName();