From 0d98db77692560c739ad661d3127c68010df0583 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Thu, 14 Jan 2021 10:34:45 +0800 Subject: [PATCH 1/5] HUDI-1269 Make whether the failure of connect hive affects hudi ingest process configurable --- .../main/java/org/apache/hudi/DataSourceUtils.java | 2 ++ .../scala/org/apache/hudi/DataSourceOptions.scala | 2 ++ .../org/apache/hudi/HoodieSparkSqlWriter.scala | 1 + .../java/org/apache/hudi/hive/HiveSyncConfig.java | 4 ++++ .../java/org/apache/hudi/hive/HiveSyncTool.java | 14 ++++++++++++-- 5 files changed, 21 insertions(+), 2 deletions(-) diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java index 8d3e81b0b218b..ba8255ccddbfc 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java +++ b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java @@ -293,6 +293,8 @@ public static HiveSyncConfig buildHiveSyncConfig(TypedProperties props, String b DataSourceWriteOptions.DEFAULT_HIVE_USE_JDBC_OPT_VAL())); hiveSyncConfig.autoCreateDatabase = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_AUTO_CREATE_DATABASE_OPT_KEY(), DataSourceWriteOptions.DEFAULT_HIVE_AUTO_CREATE_DATABASE_OPT_KEY())); + hiveSyncConfig.ignoreConnectException = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_IGNORE_CONNECT_EXCEPTION_OPT_KEY(), + DataSourceWriteOptions.DEFAULT_HIVE_IGNORE_CONNECT_EXCEPTION_OPT_KEY())); hiveSyncConfig.skipROSuffix = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_SKIP_RO_SUFFIX(), DataSourceWriteOptions.DEFAULT_HIVE_SKIP_RO_SUFFIX_VAL())); hiveSyncConfig.supportTimestamp = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_SUPPORT_TIMESTAMP(), diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala index f0974977e4a4b..e717dfb2f0b51 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala @@ -307,6 +307,7 @@ object DataSourceWriteOptions { val HIVE_USE_PRE_APACHE_INPUT_FORMAT_OPT_KEY = "hoodie.datasource.hive_sync.use_pre_apache_input_format" val HIVE_USE_JDBC_OPT_KEY = "hoodie.datasource.hive_sync.use_jdbc" val HIVE_AUTO_CREATE_DATABASE_OPT_KEY = "hoodie.datasource.hive_sync.auto_create_database" + val HIVE_IGNORE_CONNECT_EXCEPTION_OPT_KEY = "hoodie.datasource.hive_sync.ignore_connect_exception" val HIVE_SKIP_RO_SUFFIX = "hoodie.datasource.hive_sync.skip_ro_suffix" val HIVE_SUPPORT_TIMESTAMP = "hoodie.datasource.hive_sync.support_timestamp" @@ -325,6 +326,7 @@ object DataSourceWriteOptions { val DEFAULT_USE_PRE_APACHE_INPUT_FORMAT_OPT_VAL = "false" val DEFAULT_HIVE_USE_JDBC_OPT_VAL = "true" val DEFAULT_HIVE_AUTO_CREATE_DATABASE_OPT_KEY = "true" + val DEFAULT_HIVE_IGNORE_CONNECT_EXCEPTION_OPT_KEY = "false" val DEFAULT_HIVE_SKIP_RO_SUFFIX_VAL = "false" val DEFAULT_HIVE_SUPPORT_TIMESTAMP = "false" diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala index 4e9caa56e0df8..235119f99cf72 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala @@ -373,6 +373,7 @@ private[hudi] object HoodieSparkSqlWriter { hiveSyncConfig.useJdbc = parameters(HIVE_USE_JDBC_OPT_KEY).toBoolean hiveSyncConfig.useFileListingFromMetadata = parameters(HoodieMetadataConfig.METADATA_ENABLE_PROP).toBoolean hiveSyncConfig.verifyMetadataFileListing = parameters(HoodieMetadataConfig.METADATA_VALIDATE_PROP).toBoolean + hiveSyncConfig.ignoreConnectException = parameters.get(HIVE_IGNORE_CONNECT_EXCEPTION_OPT_KEY).exists(r => r.toBoolean) hiveSyncConfig.supportTimestamp = parameters.get(HIVE_SUPPORT_TIMESTAMP).exists(r => r.toBoolean) hiveSyncConfig.decodePartition = parameters.getOrElse(URL_ENCODE_PARTITIONING_OPT_KEY, DEFAULT_URL_ENCODE_PARTITIONING_OPT_VAL).toBoolean diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java index dd9d483fb10b7..e72be2b871c6a 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java @@ -76,6 +76,9 @@ public class HiveSyncConfig implements Serializable { @Parameter(names = {"--auto-create-database"}, description = "Auto create hive database") public Boolean autoCreateDatabase = true; + @Parameter(names = {"--ignore-connect-exception"}, description = "Ignore connect hive exception") + public Boolean ignoreConnectException = false; + @Parameter(names = {"--skip-ro-suffix"}, description = "Skip the `_ro` suffix for Read optimized table, when registering") public Boolean skipROSuffix = false; @@ -130,6 +133,7 @@ public String toString() { + ", usePreApacheInputFormat=" + usePreApacheInputFormat + ", useJdbc=" + useJdbc + ", autoCreateDatabase=" + autoCreateDatabase + + ", ignoreConnectException=" + ignoreConnectException + ", skipROSuffix=" + skipROSuffix + ", help=" + help + ", supportTimestamp=" + supportTimestamp diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java index d0541e0889188..32a0ff53826be 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java @@ -57,13 +57,23 @@ public class HiveSyncTool extends AbstractSyncTool { public static final String SUFFIX_READ_OPTIMIZED_TABLE = "_ro"; private final HiveSyncConfig cfg; - private final HoodieHiveClient hoodieHiveClient; + private HoodieHiveClient hoodieHiveClient = null; private final String snapshotTableName; private final Option roTableTableName; public HiveSyncTool(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) { super(configuration.getAllProperties(), fs); - this.hoodieHiveClient = new HoodieHiveClient(cfg, configuration, fs); + + try { + this.hoodieHiveClient = new HoodieHiveClient(cfg, configuration, fs); + } catch (RuntimeException e) { + if (cfg.ignoreConnectException) { + LOG.error("Got runtime exception when hive syncing", e); + } else { + throw new HoodieHiveSyncException("Got runtime exception when hive syncing", e); + } + } + this.cfg = cfg; // Set partitionFields to empty, when the NonPartitionedExtractor is used if (NonPartitionedExtractor.class.getName().equals(cfg.partitionValueExtractorClass)) { From 86faca0497789ba7d5b3e837295abb70bbf09012 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Sat, 20 Feb 2021 16:42:49 +0800 Subject: [PATCH 2/5] Fix NullPointerException And Verified in CDH 6.3.0 environment --- .../org/apache/hudi/hive/HiveSyncTool.java | 60 ++++++++++--------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java index 32a0ff53826be..1acac5e8ae9a8 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java @@ -58,8 +58,8 @@ public class HiveSyncTool extends AbstractSyncTool { private final HiveSyncConfig cfg; private HoodieHiveClient hoodieHiveClient = null; - private final String snapshotTableName; - private final Option roTableTableName; + private String snapshotTableName = null; + private Option roTableTableName = null; public HiveSyncTool(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) { super(configuration.getAllProperties(), fs); @@ -80,43 +80,49 @@ public HiveSyncTool(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) { LOG.warn("Set partitionFields to empty, since the NonPartitionedExtractor is used"); cfg.partitionFields = new ArrayList<>(); } - switch (hoodieHiveClient.getTableType()) { - case COPY_ON_WRITE: - this.snapshotTableName = cfg.tableName; - this.roTableTableName = Option.empty(); - break; - case MERGE_ON_READ: - this.snapshotTableName = cfg.tableName + SUFFIX_SNAPSHOT_TABLE; - this.roTableTableName = cfg.skipROSuffix ? Option.of(cfg.tableName) : - Option.of(cfg.tableName + SUFFIX_READ_OPTIMIZED_TABLE); - break; - default: - LOG.error("Unknown table type " + hoodieHiveClient.getTableType()); - throw new InvalidTableException(hoodieHiveClient.getBasePath()); - } - } - - @Override - public void syncHoodieTable() { - try { + if (hoodieHiveClient != null) { switch (hoodieHiveClient.getTableType()) { case COPY_ON_WRITE: - syncHoodieTable(snapshotTableName, false); + this.snapshotTableName = cfg.tableName; + this.roTableTableName = Option.empty(); break; case MERGE_ON_READ: - // sync a RO table for MOR - syncHoodieTable(roTableTableName.get(), false); - // sync a RT table for MOR - syncHoodieTable(snapshotTableName, true); + this.snapshotTableName = cfg.tableName + SUFFIX_SNAPSHOT_TABLE; + this.roTableTableName = cfg.skipROSuffix ? Option.of(cfg.tableName) : + Option.of(cfg.tableName + SUFFIX_READ_OPTIMIZED_TABLE); break; default: LOG.error("Unknown table type " + hoodieHiveClient.getTableType()); throw new InvalidTableException(hoodieHiveClient.getBasePath()); } + } + } + + @Override + public void syncHoodieTable() { + try { + if (hoodieHiveClient != null) { + switch (hoodieHiveClient.getTableType()) { + case COPY_ON_WRITE: + syncHoodieTable(snapshotTableName, false); + break; + case MERGE_ON_READ: + // sync a RO table for MOR + syncHoodieTable(roTableTableName.get(), false); + // sync a RT table for MOR + syncHoodieTable(snapshotTableName, true); + break; + default: + LOG.error("Unknown table type " + hoodieHiveClient.getTableType()); + throw new InvalidTableException(hoodieHiveClient.getBasePath()); + } + } } catch (RuntimeException re) { LOG.error("Got runtime exception when hive syncing", re); } finally { - hoodieHiveClient.close(); + if (hoodieHiveClient != null) { + hoodieHiveClient.close(); + } } } From 672445fcd3836af337793fb928077045acddf1dd Mon Sep 17 00:00:00 2001 From: Sivabalan Narayanan Date: Mon, 22 Feb 2021 12:56:05 -0500 Subject: [PATCH 3/5] Adding tests --- .../org/apache/hudi/hive/HiveSyncTool.java | 8 +++---- .../apache/hudi/hive/TestHiveSyncTool.java | 22 +++++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java index ec594a006d5a7..a67b79d10a7c2 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java @@ -60,7 +60,7 @@ public class HiveSyncTool extends AbstractSyncTool { private final HiveSyncConfig cfg; private HoodieHiveClient hoodieHiveClient = null; private String snapshotTableName = null; - private Option roTableTableName = null; + private Option roTableName = null; public HiveSyncTool(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) { super(configuration.getAllProperties(), fs); @@ -85,11 +85,11 @@ public HiveSyncTool(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) { switch (hoodieHiveClient.getTableType()) { case COPY_ON_WRITE: this.snapshotTableName = cfg.tableName; - this.roTableTableName = Option.empty(); + this.roTableName = Option.empty(); break; case MERGE_ON_READ: this.snapshotTableName = cfg.tableName + SUFFIX_SNAPSHOT_TABLE; - this.roTableTableName = cfg.skipROSuffix ? Option.of(cfg.tableName) : + this.roTableName = cfg.skipROSuffix ? Option.of(cfg.tableName) : Option.of(cfg.tableName + SUFFIX_READ_OPTIMIZED_TABLE); break; default: @@ -109,7 +109,7 @@ public void syncHoodieTable() { break; case MERGE_ON_READ: // sync a RO table for MOR - syncHoodieTable(roTableTableName.get(), false); + syncHoodieTable(roTableName.get(), false); // sync a RT table for MOR syncHoodieTable(snapshotTableName, true); break; diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java index 8a1ea4f893927..f3faf0ef848fc 100644 --- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java +++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java @@ -40,6 +40,7 @@ import org.junit.jupiter.params.provider.MethodSource; import java.io.IOException; +import java.net.URISyntaxException; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -613,4 +614,25 @@ public void testReadSchemaForMOR(boolean useJdbc) throws Exception { "The last commit that was sycned should be 103"); } + @Test + public void testConnectExceptionIgnoreConfigSet() throws IOException, URISyntaxException { + HiveTestUtil.hiveSyncConfig.useJdbc = true; + String instantTime = "100"; + HiveTestUtil.createCOWTable(instantTime, 5, false); + HoodieHiveClient hiveClient = + new HoodieHiveClient(HiveTestUtil.hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem); + assertFalse(hiveClient.doesTableExist(HiveTestUtil.hiveSyncConfig.tableName), + "Table " + HiveTestUtil.hiveSyncConfig.tableName + " should not exist initially"); + // Lets do the sync + + HiveTestUtil.hiveSyncConfig.useJdbc = true; + HiveTestUtil.hiveSyncConfig.ignoreConnectException = true; + HiveTestUtil.hiveSyncConfig.jdbcUrl = HiveTestUtil.hiveSyncConfig.jdbcUrl.replace("9999","9031"); + HiveSyncTool tool = new HiveSyncTool(HiveTestUtil.hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem); + tool.syncHoodieTable(); + + assertFalse(hiveClient.doesTableExist(HiveTestUtil.hiveSyncConfig.tableName), + "Table " + HiveTestUtil.hiveSyncConfig.tableName + " should not exist initially"); + } + } From 8aa5ce7b6ed9a4d95e08b5cef6f17ba95f3ebfcc Mon Sep 17 00:00:00 2001 From: Sivabalan Narayanan Date: Tue, 23 Feb 2021 09:13:49 -0500 Subject: [PATCH 4/5] Fixing tests --- .../test/java/org/apache/hudi/hive/TestHiveSyncTool.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java index f3faf0ef848fc..2d90d8716fd05 100644 --- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java +++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java @@ -625,10 +625,10 @@ public void testConnectExceptionIgnoreConfigSet() throws IOException, URISyntaxE "Table " + HiveTestUtil.hiveSyncConfig.tableName + " should not exist initially"); // Lets do the sync - HiveTestUtil.hiveSyncConfig.useJdbc = true; - HiveTestUtil.hiveSyncConfig.ignoreConnectException = true; - HiveTestUtil.hiveSyncConfig.jdbcUrl = HiveTestUtil.hiveSyncConfig.jdbcUrl.replace("9999","9031"); - HiveSyncTool tool = new HiveSyncTool(HiveTestUtil.hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem); + HiveSyncConfig syncToolConfig = HiveSyncConfig.copy(HiveTestUtil.hiveSyncConfig); + syncToolConfig.ignoreConnectException = true; + syncToolConfig.jdbcUrl = HiveTestUtil.hiveSyncConfig.jdbcUrl.replace("9999","9031"); + HiveSyncTool tool = new HiveSyncTool(syncToolConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem); tool.syncHoodieTable(); assertFalse(hiveClient.doesTableExist(HiveTestUtil.hiveSyncConfig.tableName), From 7baf5de95e9705141f1a0e91eb31fbad877a7b77 Mon Sep 17 00:00:00 2001 From: Sivabalan Narayanan Date: Thu, 25 Feb 2021 01:37:57 -0500 Subject: [PATCH 5/5] Addressing feedback --- .../src/main/java/org/apache/hudi/DataSourceUtils.java | 4 ++-- .../src/main/scala/org/apache/hudi/DataSourceOptions.scala | 4 ++-- .../main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala | 2 +- .../src/main/java/org/apache/hudi/hive/HiveSyncConfig.java | 6 +++--- .../src/main/java/org/apache/hudi/hive/HiveSyncTool.java | 4 ++-- .../test/java/org/apache/hudi/hive/TestHiveSyncTool.java | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java index ba8255ccddbfc..2a3666f9beb1a 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java +++ b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java @@ -293,8 +293,8 @@ public static HiveSyncConfig buildHiveSyncConfig(TypedProperties props, String b DataSourceWriteOptions.DEFAULT_HIVE_USE_JDBC_OPT_VAL())); hiveSyncConfig.autoCreateDatabase = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_AUTO_CREATE_DATABASE_OPT_KEY(), DataSourceWriteOptions.DEFAULT_HIVE_AUTO_CREATE_DATABASE_OPT_KEY())); - hiveSyncConfig.ignoreConnectException = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_IGNORE_CONNECT_EXCEPTION_OPT_KEY(), - DataSourceWriteOptions.DEFAULT_HIVE_IGNORE_CONNECT_EXCEPTION_OPT_KEY())); + hiveSyncConfig.ignoreExceptions = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_IGNORE_EXCEPTIONS_OPT_KEY(), + DataSourceWriteOptions.DEFAULT_HIVE_IGNORE_EXCEPTIONS_OPT_KEY())); hiveSyncConfig.skipROSuffix = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_SKIP_RO_SUFFIX(), DataSourceWriteOptions.DEFAULT_HIVE_SKIP_RO_SUFFIX_VAL())); hiveSyncConfig.supportTimestamp = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_SUPPORT_TIMESTAMP(), diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala index 1d1e32422410f..4b8e97cf53df2 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala @@ -347,7 +347,7 @@ object DataSourceWriteOptions { val HIVE_USE_PRE_APACHE_INPUT_FORMAT_OPT_KEY = "hoodie.datasource.hive_sync.use_pre_apache_input_format" val HIVE_USE_JDBC_OPT_KEY = "hoodie.datasource.hive_sync.use_jdbc" val HIVE_AUTO_CREATE_DATABASE_OPT_KEY = "hoodie.datasource.hive_sync.auto_create_database" - val HIVE_IGNORE_CONNECT_EXCEPTION_OPT_KEY = "hoodie.datasource.hive_sync.ignore_connect_exception" + val HIVE_IGNORE_EXCEPTIONS_OPT_KEY = "hoodie.datasource.hive_sync.ignore_exceptions" val HIVE_SKIP_RO_SUFFIX = "hoodie.datasource.hive_sync.skip_ro_suffix" val HIVE_SUPPORT_TIMESTAMP = "hoodie.datasource.hive_sync.support_timestamp" @@ -366,7 +366,7 @@ object DataSourceWriteOptions { val DEFAULT_USE_PRE_APACHE_INPUT_FORMAT_OPT_VAL = "false" val DEFAULT_HIVE_USE_JDBC_OPT_VAL = "true" val DEFAULT_HIVE_AUTO_CREATE_DATABASE_OPT_KEY = "true" - val DEFAULT_HIVE_IGNORE_CONNECT_EXCEPTION_OPT_KEY = "false" + val DEFAULT_HIVE_IGNORE_EXCEPTIONS_OPT_KEY = "false" val DEFAULT_HIVE_SKIP_RO_SUFFIX_VAL = "false" val DEFAULT_HIVE_SUPPORT_TIMESTAMP = "false" diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala index 0f2e5ef6ced9c..3dfd07c37f973 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala @@ -374,7 +374,7 @@ private[hudi] object HoodieSparkSqlWriter { hiveSyncConfig.useJdbc = parameters(HIVE_USE_JDBC_OPT_KEY).toBoolean hiveSyncConfig.useFileListingFromMetadata = parameters(HoodieMetadataConfig.METADATA_ENABLE_PROP).toBoolean hiveSyncConfig.verifyMetadataFileListing = parameters(HoodieMetadataConfig.METADATA_VALIDATE_PROP).toBoolean - hiveSyncConfig.ignoreConnectException = parameters.get(HIVE_IGNORE_CONNECT_EXCEPTION_OPT_KEY).exists(r => r.toBoolean) + hiveSyncConfig.ignoreExceptions = parameters.get(HIVE_IGNORE_EXCEPTIONS_OPT_KEY).exists(r => r.toBoolean) hiveSyncConfig.supportTimestamp = parameters.get(HIVE_SUPPORT_TIMESTAMP).exists(r => r.toBoolean) hiveSyncConfig.autoCreateDatabase = parameters.get(HIVE_AUTO_CREATE_DATABASE_OPT_KEY).exists(r => r.toBoolean) hiveSyncConfig.decodePartition = parameters.getOrElse(URL_ENCODE_PARTITIONING_OPT_KEY, diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java index e72be2b871c6a..0063d15affd1d 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java @@ -76,8 +76,8 @@ public class HiveSyncConfig implements Serializable { @Parameter(names = {"--auto-create-database"}, description = "Auto create hive database") public Boolean autoCreateDatabase = true; - @Parameter(names = {"--ignore-connect-exception"}, description = "Ignore connect hive exception") - public Boolean ignoreConnectException = false; + @Parameter(names = {"--ignore-exceptions"}, description = "Ignore hive exceptions") + public Boolean ignoreExceptions = false; @Parameter(names = {"--skip-ro-suffix"}, description = "Skip the `_ro` suffix for Read optimized table, when registering") public Boolean skipROSuffix = false; @@ -133,7 +133,7 @@ public String toString() { + ", usePreApacheInputFormat=" + usePreApacheInputFormat + ", useJdbc=" + useJdbc + ", autoCreateDatabase=" + autoCreateDatabase - + ", ignoreConnectException=" + ignoreConnectException + + ", ignoreExceptions=" + ignoreExceptions + ", skipROSuffix=" + skipROSuffix + ", help=" + help + ", supportTimestamp=" + supportTimestamp diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java index a67b79d10a7c2..754548c467167 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java @@ -68,8 +68,8 @@ public HiveSyncTool(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) { try { this.hoodieHiveClient = new HoodieHiveClient(cfg, configuration, fs); } catch (RuntimeException e) { - if (cfg.ignoreConnectException) { - LOG.error("Got runtime exception when hive syncing", e); + if (cfg.ignoreExceptions) { + LOG.error("Got runtime exception when hive syncing, but continuing as ignoreExceptions config is set ", e); } else { throw new HoodieHiveSyncException("Got runtime exception when hive syncing", e); } diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java index 2d90d8716fd05..c38a6ed4c15f6 100644 --- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java +++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java @@ -626,7 +626,7 @@ public void testConnectExceptionIgnoreConfigSet() throws IOException, URISyntaxE // Lets do the sync HiveSyncConfig syncToolConfig = HiveSyncConfig.copy(HiveTestUtil.hiveSyncConfig); - syncToolConfig.ignoreConnectException = true; + syncToolConfig.ignoreExceptions = true; syncToolConfig.jdbcUrl = HiveTestUtil.hiveSyncConfig.jdbcUrl.replace("9999","9031"); HiveSyncTool tool = new HiveSyncTool(syncToolConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem); tool.syncHoodieTable();