From e8968ba28b6598ef9d8e29db09a26dd17c69a788 Mon Sep 17 00:00:00 2001 From: Raymond Xu <2701446+xushiyan@users.noreply.github.com> Date: Sat, 9 Jul 2022 16:29:34 -0500 Subject: [PATCH 1/3] [HUDI-4323] Make database table names optional in sync tool --- .../org/apache/hudi/DataSourceOptions.scala | 2 -- .../hudi/sync/common/HoodieSyncConfig.java | 23 ++++++++----------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala index 654d1aeada2e2..feeb57212603d 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala @@ -414,8 +414,6 @@ object DataSourceWriteOptions { @Deprecated val HIVE_DATABASE: ConfigProperty[String] = HoodieSyncConfig.META_SYNC_DATABASE_NAME @Deprecated - val hiveTableOptKeyInferFunc: JavaFunction[HoodieConfig, Option[String]] = HoodieSyncConfig.TABLE_NAME_INFERENCE_FUNCTION - @Deprecated val HIVE_TABLE: ConfigProperty[String] = HoodieSyncConfig.META_SYNC_TABLE_NAME @Deprecated val HIVE_BASE_FILE_FORMAT: ConfigProperty[String] = HoodieSyncConfig.META_SYNC_BASE_FILE_FORMAT diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java index ba763ddc14bb7..fc491b23dc031 100644 --- a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java +++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java @@ -23,7 +23,6 @@ import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.keygen.constant.KeyGeneratorOptions; @@ -37,6 +36,10 @@ import java.util.Properties; import java.util.function.Function; +import static org.apache.hudi.common.table.HoodieTableConfig.DATABASE_NAME; +import static org.apache.hudi.common.table.HoodieTableConfig.HOODIE_TABLE_NAME_KEY; +import static org.apache.hudi.common.table.HoodieTableConfig.HOODIE_WRITE_TABLE_NAME_KEY; + /** * Configs needed to sync data into external meta stores, catalogs, etc. */ @@ -56,22 +59,14 @@ public class HoodieSyncConfig extends HoodieConfig { public static final ConfigProperty META_SYNC_DATABASE_NAME = ConfigProperty .key("hoodie.datasource.hive_sync.database") .defaultValue("default") + .withInferFunction(cfg -> Option.ofNullable(cfg.getString(DATABASE_NAME))) .withDocumentation("The name of the destination database that we should sync the hudi table to."); - // If the table name for the metastore destination is not provided, pick it up from write or table configs. - public static final Function> TABLE_NAME_INFERENCE_FUNCTION = cfg -> { - if (cfg.contains(HoodieTableConfig.HOODIE_WRITE_TABLE_NAME_KEY)) { - return Option.of(cfg.getString(HoodieTableConfig.HOODIE_WRITE_TABLE_NAME_KEY)); - } else if (cfg.contains(HoodieTableConfig.HOODIE_TABLE_NAME_KEY)) { - return Option.of(cfg.getString(HoodieTableConfig.HOODIE_TABLE_NAME_KEY)); - } else { - return Option.empty(); - } - }; public static final ConfigProperty META_SYNC_TABLE_NAME = ConfigProperty .key("hoodie.datasource.hive_sync.table") .defaultValue("unknown") - .withInferFunction(TABLE_NAME_INFERENCE_FUNCTION) + .withInferFunction(cfg -> Option.ofNullable(cfg.getString(HOODIE_WRITE_TABLE_NAME_KEY)) + .or(() -> Option.ofNullable(cfg.getString(HOODIE_TABLE_NAME_KEY)))) .withDocumentation("The name of the destination table that we should sync the hudi table to."); public static final ConfigProperty META_SYNC_BASE_FILE_FORMAT = ConfigProperty @@ -173,9 +168,9 @@ public String toString() { } public static class HoodieSyncConfigParams { - @Parameter(names = {"--database"}, description = "name of the target database in meta store", required = true) + @Parameter(names = {"--database"}, description = "name of the target database in meta store") public String databaseName; - @Parameter(names = {"--table"}, description = "name of the target table in meta store", required = true) + @Parameter(names = {"--table"}, description = "name of the target table in meta store") public String tableName; @Parameter(names = {"--base-path"}, description = "Base path of the hoodie table to sync", required = true) public String basePath; From 2ae16fdada861169f66e366e230f4faa911b2431 Mon Sep 17 00:00:00 2001 From: Raymond Xu <2701446+xushiyan@users.noreply.github.com> Date: Sun, 10 Jul 2022 10:47:52 -0500 Subject: [PATCH 2/3] fix default setting --- .../main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java | 1 + 1 file changed, 1 insertion(+) diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java index fc491b23dc031..429bb93aa3284 100644 --- a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java +++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java @@ -143,6 +143,7 @@ public HoodieSyncConfig(Properties props) { public HoodieSyncConfig(Properties props, Configuration hadoopConf) { super(props); + setDefaults(getClass().getName()); this.hadoopConf = hadoopConf; } From df53b691093f7823d1c8397988f875908eeb4d30 Mon Sep 17 00:00:00 2001 From: Raymond Xu <2701446+xushiyan@users.noreply.github.com> Date: Sun, 10 Jul 2022 12:28:14 -0500 Subject: [PATCH 3/3] add UT --- .../sync/common/TestHoodieSyncConfig.java | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/TestHoodieSyncConfig.java diff --git a/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/TestHoodieSyncConfig.java b/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/TestHoodieSyncConfig.java new file mode 100644 index 0000000000000..1f6c05cd12423 --- /dev/null +++ b/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/TestHoodieSyncConfig.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.sync.common; + +import org.apache.hudi.common.table.HoodieTableConfig; + +import org.apache.hadoop.conf.Configuration; +import org.junit.jupiter.api.Test; + +import java.util.Properties; + +import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME; +import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_TABLE_NAME; +import static org.junit.jupiter.api.Assertions.assertEquals; + +class TestHoodieSyncConfig { + + @Test + void testInferDatabaseAndTableNames() { + Properties props1 = new Properties(); + props1.setProperty(HoodieTableConfig.DATABASE_NAME.key(), "db1"); + props1.setProperty(HoodieTableConfig.HOODIE_TABLE_NAME_KEY, "tbl1"); + HoodieSyncConfig config1 = new HoodieSyncConfig(props1, new Configuration()); + assertEquals("db1", config1.getString(META_SYNC_DATABASE_NAME)); + assertEquals("tbl1", config1.getString(META_SYNC_TABLE_NAME)); + + Properties props2 = new Properties(); + props2.setProperty(HoodieTableConfig.DATABASE_NAME.key(), "db2"); + props2.setProperty(HoodieTableConfig.HOODIE_WRITE_TABLE_NAME_KEY, "tbl2"); + HoodieSyncConfig config2 = new HoodieSyncConfig(props2, new Configuration()); + assertEquals("db2", config2.getString(META_SYNC_DATABASE_NAME)); + assertEquals("tbl2", config2.getString(META_SYNC_TABLE_NAME)); + + HoodieSyncConfig config3 = new HoodieSyncConfig(new Properties(), new Configuration()); + assertEquals("default", config3.getString(META_SYNC_DATABASE_NAME)); + assertEquals("unknown", config3.getString(META_SYNC_TABLE_NAME)); + } +}