diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java index 13fa91f479c2d..da661ff245461 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java @@ -36,6 +36,7 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieSavepointException; import org.apache.hudi.index.HoodieIndex; +import org.apache.hudi.keygen.constant.KeyGeneratorType; import org.apache.hudi.table.action.compact.strategy.UnBoundedCompactionStrategy; import org.apache.hudi.table.upgrade.SparkUpgradeDowngrade; import org.apache.hudi.utilities.HDFSParquetImporter; @@ -58,6 +59,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Locale; /** * This class deals with initializing spark context based on command entered to hudi-cli. @@ -353,14 +355,20 @@ private static int deduplicatePartitionPath(JavaSparkContext jsc, String duplica private static int doBootstrap(JavaSparkContext jsc, String tableName, String tableType, String basePath, String sourcePath, String recordKeyCols, String partitionFields, String parallelism, String schemaProviderClass, - String bootstrapIndexClass, String selectorClass, String keyGeneratorClass, String fullBootstrapInputProvider, + String bootstrapIndexClass, String selectorClass, String keyGenerator, String fullBootstrapInputProvider, String payloadClassName, String enableHiveSync, String propsFilePath, List configs) throws IOException { TypedProperties properties = propsFilePath == null ? UtilHelpers.buildProperties(configs) : UtilHelpers.readConfig(FSUtils.getFs(propsFilePath, jsc.hadoopConfiguration()), new Path(propsFilePath), configs).getConfig(); properties.setProperty(HoodieBootstrapConfig.BOOTSTRAP_BASE_PATH_PROP.key(), sourcePath); - properties.setProperty(HoodieBootstrapConfig.BOOTSTRAP_KEYGEN_CLASS.key(), keyGeneratorClass); + + if (!StringUtils.isNullOrEmpty(keyGenerator) && KeyGeneratorType.getNames().contains(keyGenerator.toUpperCase(Locale.ROOT))) { + properties.setProperty(HoodieBootstrapConfig.BOOTSTRAP_KEYGEN_TYPE.key(), keyGenerator.toUpperCase(Locale.ROOT)); + } else { + properties.setProperty(HoodieBootstrapConfig.BOOTSTRAP_KEYGEN_CLASS.key(), keyGenerator); + } + properties.setProperty(HoodieBootstrapConfig.FULL_BOOTSTRAP_INPUT_PROVIDER.key(), fullBootstrapInputProvider); properties.setProperty(HoodieBootstrapConfig.BOOTSTRAP_PARALLELISM.key(), parallelism); properties.setProperty(HoodieBootstrapConfig.BOOTSTRAP_MODE_SELECTOR.key(), selectorClass); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieBootstrapConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieBootstrapConfig.java index 777715fda7fc6..9252b92835946 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieBootstrapConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieBootstrapConfig.java @@ -25,6 +25,7 @@ import org.apache.hudi.common.config.ConfigProperty; import org.apache.hudi.common.config.HoodieConfig; import org.apache.hudi.common.table.HoodieTableConfig; +import org.apache.hudi.keygen.constant.KeyGeneratorType; import java.io.File; import java.io.FileReader; @@ -60,6 +61,12 @@ public class HoodieBootstrapConfig extends HoodieConfig { .sinceVersion("0.6.0") .withDocumentation("Key generator implementation to be used for generating keys from the bootstrapped dataset"); + public static final ConfigProperty BOOTSTRAP_KEYGEN_TYPE = ConfigProperty + .key("hoodie.bootstrap.keygen.type") + .defaultValue(KeyGeneratorType.SIMPLE.name()) + .sinceVersion("0.9.0") + .withDocumentation("Type of build-in key generator, currently support SIMPLE, COMPLEX, TIMESTAMP, CUSTOM, NON_PARTITION, GLOBAL_DELETE"); + public static final ConfigProperty BOOTSTRAP_PARTITION_PATH_TRANSLATOR_CLASS = ConfigProperty .key("hoodie.bootstrap.partitionpath.translator.class") .defaultValue(IdentityBootstrapPartitionPathTranslator.class.getName()) @@ -131,6 +138,11 @@ public Builder withBootstrapKeyGenClass(String keyGenClass) { return this; } + public Builder withBootstrapKeyGenType(String keyGenType) { + bootstrapConfig.setValue(BOOTSTRAP_KEYGEN_TYPE, keyGenType); + return this; + } + public Builder withBootstrapPartitionPathTranslatorClass(String partitionPathTranslatorClass) { bootstrapConfig .setValue(BOOTSTRAP_PARTITION_PATH_TRANSLATOR_CLASS, partitionPathTranslatorClass); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java index 1783535e82f53..5946f9c1646e9 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java @@ -1126,6 +1126,10 @@ public String getBootstrapKeyGeneratorClass() { return getString(HoodieBootstrapConfig.BOOTSTRAP_KEYGEN_CLASS); } + public String getBootstrapKeyGeneratorType() { + return getString(HoodieBootstrapConfig.BOOTSTRAP_KEYGEN_TYPE); + } + public String getBootstrapModeSelectorRegex() { return getString(HoodieBootstrapConfig.BOOTSTRAP_MODE_SELECTOR_REGEX); } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorType.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorType.java index a37d1b341c35f..4babda59249a3 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorType.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorType.java @@ -18,6 +18,10 @@ package org.apache.hudi.keygen.constant; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + /** * Types of {@link org.apache.hudi.keygen.KeyGenerator}. */ @@ -55,5 +59,12 @@ public enum KeyGeneratorType { /** * Key generator for deletes using global indices. */ - GLOBAL_DELETE + GLOBAL_DELETE; + + public static List getNames() { + List names = new ArrayList<>(KeyGeneratorType.values().length); + Arrays.stream(KeyGeneratorType.values()) + .forEach(x -> names.add(x.name())); + return names; + } } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/factory/HoodieAvroKeyGeneratorFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/factory/HoodieAvroKeyGeneratorFactory.java index a9e9dbbe2c630..47718990dda44 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/factory/HoodieAvroKeyGeneratorFactory.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/factory/HoodieAvroKeyGeneratorFactory.java @@ -18,6 +18,7 @@ package org.apache.hudi.keygen.factory; import org.apache.hudi.common.config.TypedProperties; +import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieKeyGeneratorException; import org.apache.hudi.keygen.ComplexAvroKeyGenerator; @@ -30,6 +31,9 @@ import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator; import org.apache.hudi.keygen.constant.KeyGeneratorType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.io.IOException; import java.util.Locale; import java.util.Objects; @@ -41,6 +45,9 @@ * will not be overwritten by {@link KeyGeneratorType} */ public class HoodieAvroKeyGeneratorFactory { + + private static final Logger LOG = LoggerFactory.getLogger(HoodieAvroKeyGeneratorFactory.class); + public static KeyGenerator createKeyGenerator(TypedProperties props) throws IOException { // keyGenerator class name has higher priority KeyGenerator keyGenerator = KeyGenUtils.createKeyGeneratorByClassName(props); @@ -50,7 +57,12 @@ public static KeyGenerator createKeyGenerator(TypedProperties props) throws IOEx private static KeyGenerator createAvroKeyGeneratorByType(TypedProperties props) throws IOException { // Use KeyGeneratorType.SIMPLE as default keyGeneratorType String keyGeneratorType = - props.getString(HoodieWriteConfig.KEYGENERATOR_TYPE_PROP.key(), KeyGeneratorType.SIMPLE.name()); + props.getString(HoodieWriteConfig.KEYGENERATOR_TYPE_PROP.key(), null); + + if (StringUtils.isNullOrEmpty(keyGeneratorType)) { + LOG.info("The value of {} is empty, use SIMPLE", HoodieWriteConfig.KEYGENERATOR_TYPE_PROP.key()); + keyGeneratorType = KeyGeneratorType.SIMPLE.name(); + } KeyGeneratorType keyGeneratorTypeEnum; try { diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/factory/HoodieSparkKeyGeneratorFactory.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/factory/HoodieSparkKeyGeneratorFactory.java index 902bdfb8ff34e..a31ee9b4f7ada 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/factory/HoodieSparkKeyGeneratorFactory.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/factory/HoodieSparkKeyGeneratorFactory.java @@ -19,6 +19,7 @@ package org.apache.hudi.keygen.factory; import org.apache.hudi.common.config.TypedProperties; +import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieKeyGeneratorException; import org.apache.hudi.keygen.BuiltinKeyGenerator; @@ -32,6 +33,9 @@ import org.apache.hudi.keygen.TimestampBasedKeyGenerator; import org.apache.hudi.keygen.constant.KeyGeneratorType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.io.IOException; import java.util.Locale; import java.util.Objects; @@ -44,6 +48,8 @@ */ public class HoodieSparkKeyGeneratorFactory { + private static final Logger LOG = LoggerFactory.getLogger(HoodieSparkKeyGeneratorFactory.class); + public static KeyGenerator createKeyGenerator(TypedProperties props) throws IOException { // keyGenerator class name has higher priority KeyGenerator keyGenerator = KeyGenUtils.createKeyGeneratorByClassName(props); @@ -54,7 +60,12 @@ public static KeyGenerator createKeyGenerator(TypedProperties props) throws IOEx private static BuiltinKeyGenerator createKeyGeneratorByType(TypedProperties props) throws IOException { // Use KeyGeneratorType.SIMPLE as default keyGeneratorType String keyGeneratorType = - props.getString(HoodieWriteConfig.KEYGENERATOR_TYPE_PROP.key(), KeyGeneratorType.SIMPLE.name()); + props.getString(HoodieWriteConfig.KEYGENERATOR_TYPE_PROP.key(), null); + + if (StringUtils.isNullOrEmpty(keyGeneratorType)) { + LOG.info("The value of {} is empty, use SIMPLE", HoodieWriteConfig.KEYGENERATOR_TYPE_PROP.key()); + keyGeneratorType = KeyGeneratorType.SIMPLE.name(); + } KeyGeneratorType keyGeneratorTypeEnum; try { diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java index 4f204eabd1830..f6cc8a0c32f8a 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java @@ -57,10 +57,12 @@ import org.apache.hudi.exception.HoodieCommitException; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.exception.HoodieKeyGeneratorException; import org.apache.hudi.exception.HoodieMetadataException; import org.apache.hudi.execution.SparkBoundedInMemoryExecutor; import org.apache.hudi.io.HoodieBootstrapHandle; import org.apache.hudi.keygen.KeyGeneratorInterface; +import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory; import org.apache.hudi.metadata.HoodieTableMetadataWriter; import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter; import org.apache.hudi.table.HoodieSparkTable; @@ -123,8 +125,6 @@ private void validate() { "Ensure Bootstrap Source Path is set"); ValidationUtils.checkArgument(config.getBootstrapModeSelectorClass() != null, "Ensure Bootstrap Partition Selector is set"); - ValidationUtils.checkArgument(config.getBootstrapKeyGeneratorClass() != null, - "Ensure bootstrap key generator class is set"); } @Override @@ -390,8 +390,14 @@ private JavaRDD runMetadataBootstrap(List