From 5456ff21918353639272de85422285da4aec02e4 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Tue, 7 Sep 2021 16:38:21 +0800 Subject: [PATCH 01/37] HUDI-2370 --- .../apache/hudi/crypot/kms/InMemoryKMS.java | 104 ++++++++++++++++++ .../hudi/crypot/TestParquetEncryption.java | 102 +++++++++++++++++ pom.xml | 2 +- 3 files changed, 207 insertions(+), 1 deletion(-) create mode 100644 hudi-common/src/main/java/org/apache/hudi/crypot/kms/InMemoryKMS.java create mode 100644 hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/crypot/TestParquetEncryption.java diff --git a/hudi-common/src/main/java/org/apache/hudi/crypot/kms/InMemoryKMS.java b/hudi-common/src/main/java/org/apache/hudi/crypot/kms/InMemoryKMS.java new file mode 100644 index 0000000000000..44421e1cc81c8 --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/crypot/kms/InMemoryKMS.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.crypot.kms; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hudi.common.config.ConfigProperty; +import org.apache.parquet.crypto.KeyAccessDeniedException; +import org.apache.parquet.crypto.ParquetCryptoRuntimeException; +import org.apache.parquet.crypto.keytools.KeyToolkit; +import org.apache.parquet.crypto.keytools.KmsClient; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.charset.StandardCharsets; +import java.util.Base64; +import java.util.HashMap; +import java.util.Map; + +/** + * This is a mock class, built for testing only. Don't use it as an example of KmsClient implementation. + * (org.apache.parquet.crypto.keytools.samples.VaultClient is the sample implementation). + */ +public class InMemoryKMS implements KmsClient { + private static final Logger LOG = LoggerFactory.getLogger(InMemoryKMS.class); + + private static final ConfigProperty KEY_LIST_PROPERTY_NAME = ConfigProperty + .key("hoodie.parquet.encryption.key.list") + .noDefaultValue() + .withDocumentation("hudi parquet encryption key list, example: keyName_1:value_1, keyName_2:value_2"); + + private static Map masterKeyMap; + + @Override + public synchronized void initialize(Configuration configuration, String kmsInstanceID, String kmsInstanceURL, String accessToken) { + // Parse master keys + String[] masterKeys = configuration.getTrimmedStrings(KEY_LIST_PROPERTY_NAME.key()); + if (null == masterKeys || masterKeys.length == 0) { + throw new ParquetCryptoRuntimeException("No encryption key list"); + } + masterKeyMap = parseKeyList(masterKeys); + } + + private static Map parseKeyList(String[] masterKeys) { + + Map keyMap = new HashMap<>(); + + int nKeys = masterKeys.length; + for (int i = 0; i < nKeys; i++) { + String[] parts = masterKeys[i].split(":"); + String keyName = parts[0].trim(); + if (parts.length != 2) { + throw new IllegalArgumentException("Key '" + keyName + "' is not formatted correctly"); + } + String key = parts[1].trim(); + try { + byte[] keyBytes = Base64.getDecoder().decode(key); + keyMap.put(keyName, keyBytes); + } catch (IllegalArgumentException e) { + LOG.warn("Could not decode key '" + keyName + "'!"); + throw e; + } + } + return keyMap; + } + + @Override + public synchronized String wrapKey(byte[] keyBytes, String masterKeyIdentifier) + throws KeyAccessDeniedException, UnsupportedOperationException { + + byte[] masterKey = masterKeyMap.get(masterKeyIdentifier); + if (null == masterKey) { + throw new ParquetCryptoRuntimeException("Key not found: " + masterKeyIdentifier); + } + byte[] add = masterKeyIdentifier.getBytes(StandardCharsets.UTF_8); + return KeyToolkit.encryptKeyLocally(keyBytes, masterKey, add); + } + + @Override + public synchronized byte[] unwrapKey(String wrappedKey, String masterKeyIdentifier) + throws KeyAccessDeniedException, UnsupportedOperationException { + byte[] masterKey = masterKeyMap.get(masterKeyIdentifier); + if (null == masterKey) { + throw new ParquetCryptoRuntimeException("Key not found: " + masterKeyIdentifier); + } + byte[] add = masterKeyIdentifier.getBytes(StandardCharsets.UTF_8); + return KeyToolkit.decryptKeyLocally(wrappedKey, masterKey, add); + } +} \ No newline at end of file diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/crypot/TestParquetEncryption.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/crypot/TestParquetEncryption.java new file mode 100644 index 0000000000000..f36d2409fa9a5 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/crypot/TestParquetEncryption.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.crypot; + +import org.apache.hudi.crypot.kms.InMemoryKMS; +import org.apache.hudi.testutils.HoodieClientTestBase; +import org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; + +import static org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings; + +public class TestParquetEncryption extends HoodieClientTestBase { + + private SparkSession spark; + + private HashMap commonOpts = new HashMap(); + + @BeforeEach + public void setUp() throws Exception { + commonOpts.put("hoodie.insert.shuffle.parallelism", "4"); + commonOpts.put("hoodie.upsert.shuffle.parallelism", "4"); + commonOpts.put("hoodie.bulkinsert.shuffle.parallelism", "4"); + commonOpts.put("hoodie.delete.shuffle.parallelism", "4"); + commonOpts.put("hoodie.datasource.write.recordkey.field", "_row_key"); + commonOpts.put("hoodie.datasource.write.partitionpath.field", "partition"); + commonOpts.put("hoodie.datasource.write.precombine.field", "timestamp"); + commonOpts.put("hoodie.table.name", "hoodie_test"); + + initPath(); + initSparkContexts(); + spark = sqlContext.sparkSession(); + initTestDataGenerator(); + initFileSystem(); + } + + @AfterEach + public void tearDown() throws IOException { + cleanupSparkContexts(); + cleanupTestDataGenerator(); + cleanupFileSystem(); + } + + @Test + public void testEncryption() { + + JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext()); + jsc.hadoopConfiguration().set("parquet.crypto.factory.class", PropertiesDrivenCryptoFactory.class.getName()); + jsc.hadoopConfiguration().set("parquet.encryption.kms.client.class", InMemoryKMS.class.getName()); + jsc.hadoopConfiguration().set("parquet.encryption.footer.key", "k1"); + jsc.hadoopConfiguration().set("parquet.encryption.column.keys", "k2:rider,_row_key"); + jsc.hadoopConfiguration().set("hoodie.parquet.encryption.key.list", "k1:AAECAwQFBgcICQoLDA0ODw==, k2:AAECAAECAAECAAECAAECAA=="); + + List records1 = recordsToStrings(dataGen.generateInserts("000", 100)); + Dataset inputDF1 = spark.read().json(jsc.parallelize(records1, 2)); + + inputDF1.write().format("org.apache.hudi") + .options(commonOpts) + .mode(SaveMode.Overwrite) + .save(basePath); + + + //1. no footer key, no column key + jsc.hadoopConfiguration().clear(); + Assertions.assertThrows(Exception.class, () -> spark.read().format("org.apache.hudi").load(basePath).count()); + Assertions.assertThrows(Exception.class, () -> spark.read().format("org.apache.hudi").load(basePath).select("rider").show(1)); + + //2 has footer key, has column key + jsc.hadoopConfiguration().set("parquet.crypto.factory.class", PropertiesDrivenCryptoFactory.class.getName()); + jsc.hadoopConfiguration().set("parquet.encryption.kms.client.class", InMemoryKMS.class.getName()); + jsc.hadoopConfiguration().set("hoodie.parquet.encryption.key.list", "k1:AAECAwQFBgcICQoLDA0ODw==, k2:AAECAAECAAECAAECAAECAA=="); + Assertions.assertEquals(100, spark.read().format("org.apache.hudi").load(basePath).count()); + Assertions.assertDoesNotThrow(() -> spark.read().format("org.apache.hudi").load(basePath).select("rider").show(1)); + } +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 0fd756ee4a54e..6462ae0da29c6 100644 --- a/pom.xml +++ b/pom.xml @@ -91,7 +91,7 @@ 2.0.0 5.3.4 2.17 - 1.10.1 + 1.12.0 5.7.0-M1 5.7.0-M1 1.7.0-M1 From 926c941ed2dada6a4cf0670a8d221452b312625a Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Wed, 8 Sep 2021 16:54:46 +0800 Subject: [PATCH 02/37] exclude other parquet --- hudi-cli/pom.xml | 10 ++++++++++ hudi-client/hudi-spark-client/pom.xml | 10 ++++++++++ hudi-flink/pom.xml | 8 +++++++- hudi-sync/hudi-hive-sync/pom.xml | 10 ++++++++++ hudi-utilities/pom.xml | 8 ++++++++ packaging/hudi-flink-bundle/pom.xml | 2 +- 6 files changed, 46 insertions(+), 2 deletions(-) diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml index a81e79e3cd2ef..db2a3d0e0583d 100644 --- a/hudi-cli/pom.xml +++ b/hudi-cli/pom.xml @@ -224,6 +224,16 @@ org.apache.spark spark-sql_${scala.binary.version} + + + parquet-column + org.apache.parquet + + + parquet-hadoop + org.apache.parquet + + org.apache.spark diff --git a/hudi-client/hudi-spark-client/pom.xml b/hudi-client/hudi-spark-client/pom.xml index 21cfe6bff24f7..f57b4a4b788d5 100644 --- a/hudi-client/hudi-spark-client/pom.xml +++ b/hudi-client/hudi-spark-client/pom.xml @@ -52,6 +52,16 @@ org.apache.spark spark-sql_${scala.binary.version} + + + parquet-column + org.apache.parquet + + + parquet-hadoop + org.apache.parquet + + org.apache.spark diff --git a/hudi-flink/pom.xml b/hudi-flink/pom.xml index 8885007748f5f..b61d5ed667621 100644 --- a/hudi-flink/pom.xml +++ b/hudi-flink/pom.xml @@ -30,7 +30,7 @@ ${project.parent.basedir} - 1.11.1 + 1.12.0 @@ -155,6 +155,12 @@ flink-parquet_${scala.binary.version} ${flink.version} provided + + + parquet-hadoop + org.apache.parquet + + org.apache.flink diff --git a/hudi-sync/hudi-hive-sync/pom.xml b/hudi-sync/hudi-hive-sync/pom.xml index 8f4fedaed5e0f..6349ab7ff702f 100644 --- a/hudi-sync/hudi-hive-sync/pom.xml +++ b/hudi-sync/hudi-hive-sync/pom.xml @@ -149,6 +149,16 @@ org.apache.spark spark-sql_${scala.binary.version} test + + + parquet-column + org.apache.parquet + + + parquet-hadoop + org.apache.parquet + + diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml index 089b7801baf82..e72c82ef9bec2 100644 --- a/hudi-utilities/pom.xml +++ b/hudi-utilities/pom.xml @@ -207,6 +207,14 @@ javax.servlet * + + parquet-hadoop + org.apache.parquet + + + parquet-column + org.apache.parquet + diff --git a/packaging/hudi-flink-bundle/pom.xml b/packaging/hudi-flink-bundle/pom.xml index fea8849278d4d..1cac0234253a3 100644 --- a/packaging/hudi-flink-bundle/pom.xml +++ b/packaging/hudi-flink-bundle/pom.xml @@ -35,7 +35,7 @@ org.apache.hudi. 3.1.0 - 1.11.1 + 1.12.0 2.3.1 0.9.3 From 669716935d27d9a7263b6e32882e246b36b100ca Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Thu, 16 Sep 2021 13:53:49 +0800 Subject: [PATCH 03/37] update spark.sql.parquet.enableVectorizedReader=true --- .../scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala index f1bc847514df4..c7afd0f8c27dc 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala @@ -112,7 +112,7 @@ class MergeOnReadIncrementalRelation(val sqlContext: SQLContext, // config to ensure the push down filter for parquet will be applied. sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.filterPushdown", "true") sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.recordLevelFilter.enabled", "true") - sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "false") + sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "true") val pushDownFilter = { val isNotNullFilter = IsNotNull(HoodieRecord.COMMIT_TIME_METADATA_FIELD) val largerThanFilter = GreaterThanOrEqual(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitsToReturn.head.getTimestamp) From dd1bc3da2bf90e6a8ff1c574be035b4b28ee536e Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Wed, 13 Oct 2021 14:36:27 +0800 Subject: [PATCH 04/37] TEST ci --- .../scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala index c7afd0f8c27dc..f1bc847514df4 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala @@ -112,7 +112,7 @@ class MergeOnReadIncrementalRelation(val sqlContext: SQLContext, // config to ensure the push down filter for parquet will be applied. sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.filterPushdown", "true") sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.recordLevelFilter.enabled", "true") - sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "true") + sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "false") val pushDownFilter = { val isNotNullFilter = IsNotNull(HoodieRecord.COMMIT_TIME_METADATA_FIELD) val largerThanFilter = GreaterThanOrEqual(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitsToReturn.head.getTimestamp) From b9cbda1e700dc3d06b03616558393598c5964450 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Mon, 18 Oct 2021 16:30:44 +0800 Subject: [PATCH 05/37] test ci --- .../scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala index f1bc847514df4..c7afd0f8c27dc 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala @@ -112,7 +112,7 @@ class MergeOnReadIncrementalRelation(val sqlContext: SQLContext, // config to ensure the push down filter for parquet will be applied. sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.filterPushdown", "true") sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.recordLevelFilter.enabled", "true") - sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "false") + sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "true") val pushDownFilter = { val isNotNullFilter = IsNotNull(HoodieRecord.COMMIT_TIME_METADATA_FIELD) val largerThanFilter = GreaterThanOrEqual(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitsToReturn.head.getTimestamp) From 30f5c98fc899f136a071d3bb009dd8d60e059871 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Tue, 26 Oct 2021 10:27:12 +0800 Subject: [PATCH 06/37] HUDI-2370 --- .../scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala index c7afd0f8c27dc..c19b8963889e8 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala @@ -113,6 +113,7 @@ class MergeOnReadIncrementalRelation(val sqlContext: SQLContext, sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.filterPushdown", "true") sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.recordLevelFilter.enabled", "true") sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "true") + sqlContext.sparkSession.sessionState.conf.setConfString("parquet.filter.columnindex.enabled", "false") val pushDownFilter = { val isNotNullFilter = IsNotNull(HoodieRecord.COMMIT_TIME_METADATA_FIELD) val largerThanFilter = GreaterThanOrEqual(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitsToReturn.head.getTimestamp) From 5e0672ff41961296d581b06ffb26723a8456a423 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Tue, 26 Oct 2021 10:30:27 +0800 Subject: [PATCH 07/37] HUDI-2370 --- .../scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala index a25bdcc55b2b1..1d819d78efead 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala @@ -113,7 +113,7 @@ class MergeOnReadIncrementalRelation(val sqlContext: SQLContext, // config to ensure the push down filter for parquet will be applied. sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.filterPushdown", "true") sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.recordLevelFilter.enabled", "true") - sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "true") + sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "false") sqlContext.sparkSession.sessionState.conf.setConfString("parquet.filter.columnindex.enabled", "false") val pushDownFilter = { val isNotNullFilter = IsNotNull(HoodieRecord.COMMIT_TIME_METADATA_FIELD) From 9207f2cf2f6216f84493834e35b02e1083f88923 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Tue, 26 Oct 2021 15:01:00 +0800 Subject: [PATCH 08/37] HUDI-2370 --- hudi-cli/pom.xml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml index 09815203b16b9..26c0e685a931d 100644 --- a/hudi-cli/pom.xml +++ b/hudi-cli/pom.xml @@ -224,16 +224,6 @@ org.apache.spark spark-sql_${scala.binary.version} - - - parquet-column - org.apache.parquet - - - parquet-hadoop - org.apache.parquet - - org.apache.spark From 115d60609dc4867085f646bd1810a60adae696ee Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Wed, 27 Oct 2021 11:08:56 +0800 Subject: [PATCH 09/37] HUDI-2370 --- hudi-cli/pom.xml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml index 26c0e685a931d..e0b9d3274a339 100644 --- a/hudi-cli/pom.xml +++ b/hudi-cli/pom.xml @@ -224,6 +224,16 @@ org.apache.spark spark-sql_${scala.binary.version} + + + parquet-hadoop + org.apache.parquet + + + parquet-column + org.apache.parquet + + org.apache.spark From cb2d4c6bb353d978ff7e239536a42e8940605168 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Wed, 27 Oct 2021 11:17:26 +0800 Subject: [PATCH 10/37] HUDI-2370 --- hudi-cli/pom.xml | 10 ---------- hudi-integ-test/pom.xml | 8 ++++++++ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml index e0b9d3274a339..26c0e685a931d 100644 --- a/hudi-cli/pom.xml +++ b/hudi-cli/pom.xml @@ -224,16 +224,6 @@ org.apache.spark spark-sql_${scala.binary.version} - - - parquet-hadoop - org.apache.parquet - - - parquet-column - org.apache.parquet - - org.apache.spark diff --git a/hudi-integ-test/pom.xml b/hudi-integ-test/pom.xml index c697d6cc0102a..524eb16a35caa 100644 --- a/hudi-integ-test/pom.xml +++ b/hudi-integ-test/pom.xml @@ -85,6 +85,14 @@ org.apache.curator * + + parquet-hadoop + org.apache.parquet + + + parquet-column + org.apache.parquet + From f6d92049414157aa203ec0791f17a7694bfda531 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Wed, 27 Oct 2021 14:36:24 +0800 Subject: [PATCH 11/37] HUDI-2370 --- packaging/hudi-flink-bundle/pom.xml | 6 ++++++ packaging/hudi-integ-test-bundle/pom.xml | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/packaging/hudi-flink-bundle/pom.xml b/packaging/hudi-flink-bundle/pom.xml index eddcc50b77c2e..4a519524cc0d6 100644 --- a/packaging/hudi-flink-bundle/pom.xml +++ b/packaging/hudi-flink-bundle/pom.xml @@ -333,6 +333,12 @@ flink-parquet_${scala.binary.version} ${flink.version} compile + + + parquet-hadoop + org.apache.parquet + + org.apache.flink diff --git a/packaging/hudi-integ-test-bundle/pom.xml b/packaging/hudi-integ-test-bundle/pom.xml index 7518e7b44ef0f..4933b6879a66e 100644 --- a/packaging/hudi-integ-test-bundle/pom.xml +++ b/packaging/hudi-integ-test-bundle/pom.xml @@ -549,6 +549,16 @@ org.apache.spark spark-sql_${scala.binary.version} + + + parquet-column + org.apache.parquet + + + parquet-hadoop + org.apache.parquet + + From c665b68dde8d2c71b18ea1578e390cc8894e80e6 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Wed, 27 Oct 2021 15:29:50 +0800 Subject: [PATCH 12/37] HUDI-2370 --- hudi-integ-test/pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hudi-integ-test/pom.xml b/hudi-integ-test/pom.xml index 524eb16a35caa..8015843581a38 100644 --- a/hudi-integ-test/pom.xml +++ b/hudi-integ-test/pom.xml @@ -96,6 +96,12 @@ + + + org.apache.parquet + parquet-avro + + org.apache.hudi From 1e344f7051d252c4953118f6640c05688278de35 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Fri, 29 Oct 2021 14:28:11 +0800 Subject: [PATCH 13/37] HUDI-2370 --- hudi-cli/pom.xml | 10 ++++++++++ hudi-examples/pom.xml | 10 ++++++++++ hudi-spark-datasource/hudi-spark/pom.xml | 20 ++++++++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml index 26c0e685a931d..0c5cdc061af77 100644 --- a/hudi-cli/pom.xml +++ b/hudi-cli/pom.xml @@ -224,6 +224,16 @@ org.apache.spark spark-sql_${scala.binary.version} + + + parquet-column + org.apache.parquet + + + parquet-hadoop + org.apache.parquet + + org.apache.spark diff --git a/hudi-examples/pom.xml b/hudi-examples/pom.xml index 8e58acdda0af9..6f5c5a21c61ad 100644 --- a/hudi-examples/pom.xml +++ b/hudi-examples/pom.xml @@ -177,6 +177,16 @@ org.apache.spark spark-sql_${scala.binary.version} + + + parquet-column + org.apache.parquet + + + parquet-hadoop + org.apache.parquet + + org.apache.spark diff --git a/hudi-spark-datasource/hudi-spark/pom.xml b/hudi-spark-datasource/hudi-spark/pom.xml index e7f5211567145..a05ac31191370 100644 --- a/hudi-spark-datasource/hudi-spark/pom.xml +++ b/hudi-spark-datasource/hudi-spark/pom.xml @@ -290,6 +290,16 @@ org.apache.spark spark-sql_${scala.binary.version} + + + parquet-column + org.apache.parquet + + + parquet-hadoop + org.apache.parquet + + @@ -302,6 +312,16 @@ spark-sql_${scala.binary.version} tests test + + + parquet-column + org.apache.parquet + + + parquet-hadoop + org.apache.parquet + + org.apache.spark From 3352417fb39be7503b94033cc6e312195477d83a Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Mon, 1 Nov 2021 17:50:40 +0800 Subject: [PATCH 14/37] HUDI-2370 --- hudi-client/hudi-spark-client/pom.xml | 5 +++++ pom.xml | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/hudi-client/hudi-spark-client/pom.xml b/hudi-client/hudi-spark-client/pom.xml index f57b4a4b788d5..e5a6fb982409e 100644 --- a/hudi-client/hudi-spark-client/pom.xml +++ b/hudi-client/hudi-spark-client/pom.xml @@ -75,6 +75,11 @@ parquet-avro + + org.apache.parquet + parquet-column + + org.apache.hudi diff --git a/pom.xml b/pom.xml index aa99b53f943b2..4f04a845ebf88 100644 --- a/pom.xml +++ b/pom.xml @@ -531,7 +531,12 @@ org.apache.parquet parquet-avro ${parquet.version} - provided + + + + org.apache.parquet + parquet-column + ${parquet.version} From 03c530876164a57d61df0084767b10fa078b44ea Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Tue, 2 Nov 2021 14:10:19 +0800 Subject: [PATCH 15/37] HUDI-2370 --- hudi-cli/pom.xml | 5 +++++ hudi-client/hudi-client-common/pom.xml | 5 +++++ hudi-client/hudi-java-client/pom.xml | 5 +++++ hudi-common/pom.xml | 5 +++++ hudi-examples/pom.xml | 6 ++++++ hudi-hadoop-mr/pom.xml | 5 +++++ packaging/hudi-spark-bundle/pom.xml | 7 ++++++- packaging/hudi-utilities-bundle/pom.xml | 7 +++++++ pom.xml | 2 ++ 9 files changed, 46 insertions(+), 1 deletion(-) diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml index 0c5cdc061af77..6a74ce3db14b0 100644 --- a/hudi-cli/pom.xml +++ b/hudi-cli/pom.xml @@ -216,6 +216,11 @@ + + org.apache.parquet + parquet-column + + org.apache.spark diff --git a/hudi-client/hudi-client-common/pom.xml b/hudi-client/hudi-client-common/pom.xml index c67621fbb9a35..c7de8e77fefd9 100644 --- a/hudi-client/hudi-client-common/pom.xml +++ b/hudi-client/hudi-client-common/pom.xml @@ -59,6 +59,11 @@ parquet-avro + + org.apache.parquet + parquet-column + + io.dropwizard.metrics diff --git a/hudi-client/hudi-java-client/pom.xml b/hudi-client/hudi-java-client/pom.xml index af97f68f341e7..af0bb9009e50e 100644 --- a/hudi-client/hudi-java-client/pom.xml +++ b/hudi-client/hudi-java-client/pom.xml @@ -43,6 +43,11 @@ parquet-avro + + org.apache.parquet + parquet-column + + org.apache.hudi diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml index 71326e7504e62..682f8675f0f95 100644 --- a/hudi-common/pom.xml +++ b/hudi-common/pom.xml @@ -119,6 +119,11 @@ parquet-avro + + org.apache.parquet + parquet-column + + org.apache.orc diff --git a/hudi-examples/pom.xml b/hudi-examples/pom.xml index 6f5c5a21c61ad..52279c46ce55a 100644 --- a/hudi-examples/pom.xml +++ b/hudi-examples/pom.xml @@ -200,6 +200,12 @@ ${parquet.version} + + org.apache.parquet + parquet-column + ${parquet.version} + + org.apache.avro diff --git a/hudi-hadoop-mr/pom.xml b/hudi-hadoop-mr/pom.xml index a33b2e2925604..60b53d5a47c57 100644 --- a/hudi-hadoop-mr/pom.xml +++ b/hudi-hadoop-mr/pom.xml @@ -49,6 +49,11 @@ parquet-avro + + org.apache.parquet + parquet-column + + org.apache.hadoop diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml index 4adc003c66d86..00872cbba10d0 100644 --- a/packaging/hudi-spark-bundle/pom.xml +++ b/packaging/hudi-spark-bundle/pom.xml @@ -86,6 +86,7 @@ org.apache.httpcomponents:fluent-hc org.antlr:stringtemplate org.apache.parquet:parquet-avro + org.apache.parquet:parquet-column com.twitter:bijection-avro_${scala.binary.version} com.twitter:bijection-core_${scala.binary.version} @@ -263,7 +264,11 @@ org.apache.parquet parquet-avro - compile + + + + org.apache.parquet + parquet-column diff --git a/packaging/hudi-utilities-bundle/pom.xml b/packaging/hudi-utilities-bundle/pom.xml index 148c20514e7ba..eb1fcb5bb803e 100644 --- a/packaging/hudi-utilities-bundle/pom.xml +++ b/packaging/hudi-utilities-bundle/pom.xml @@ -89,6 +89,7 @@ org.apache.httpcomponents:fluent-hc org.antlr:stringtemplate org.apache.parquet:parquet-avro + org.apache.parquet:parquet-column com.twitter:bijection-avro_${scala.binary.version} com.twitter:bijection-core_${scala.binary.version} @@ -262,6 +263,12 @@ compile + + org.apache.parquet + parquetcolumn + compile + + ${hive.groupid} diff --git a/pom.xml b/pom.xml index 4f04a845ebf88..3b673e2a541d4 100644 --- a/pom.xml +++ b/pom.xml @@ -531,12 +531,14 @@ org.apache.parquet parquet-avro ${parquet.version} + provided org.apache.parquet parquet-column ${parquet.version} + provided From c17ca61aa8d41aed480fde22abc48e7b8c5d0aba Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Tue, 2 Nov 2021 14:13:46 +0800 Subject: [PATCH 16/37] HUDI-2370 --- packaging/hudi-utilities-bundle/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/hudi-utilities-bundle/pom.xml b/packaging/hudi-utilities-bundle/pom.xml index eb1fcb5bb803e..756f9d42a1253 100644 --- a/packaging/hudi-utilities-bundle/pom.xml +++ b/packaging/hudi-utilities-bundle/pom.xml @@ -265,7 +265,7 @@ org.apache.parquet - parquetcolumn + parquet-column compile From 61156c4e958c1b20c3479a55ef71f2e11891398a Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Tue, 2 Nov 2021 19:59:02 +0800 Subject: [PATCH 17/37] HUDI-2370 --- hudi-integ-test/pom.xml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hudi-integ-test/pom.xml b/hudi-integ-test/pom.xml index 8015843581a38..c810de6ecb310 100644 --- a/hudi-integ-test/pom.xml +++ b/hudi-integ-test/pom.xml @@ -98,8 +98,13 @@ - org.apache.parquet - parquet-avro + org.apache.parquet + parquet-avro + + + + org.apache.parquet + parquet-column From a3677e66a1fb13c1a91d6beb977b00ddfdd6a51e Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Wed, 3 Nov 2021 11:15:01 +0800 Subject: [PATCH 18/37] HUDI-2370 --- hudi-integ-test/pom.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hudi-integ-test/pom.xml b/hudi-integ-test/pom.xml index c810de6ecb310..d45959318376e 100644 --- a/hudi-integ-test/pom.xml +++ b/hudi-integ-test/pom.xml @@ -375,6 +375,10 @@ org.eclipse.jetty * + + parquet-hadoop-bundle + org.apache.parquet + test From 624c64a620b67825474733e9b056ca275b4c01e2 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Wed, 3 Nov 2021 15:58:55 +0800 Subject: [PATCH 19/37] HUDI-2370 --- packaging/hudi-hadoop-mr-bundle/pom.xml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/packaging/hudi-hadoop-mr-bundle/pom.xml b/packaging/hudi-hadoop-mr-bundle/pom.xml index 76ede4738956b..7e42fd828ba6f 100644 --- a/packaging/hudi-hadoop-mr-bundle/pom.xml +++ b/packaging/hudi-hadoop-mr-bundle/pom.xml @@ -68,6 +68,7 @@ org.apache.hudi:hudi-hadoop-mr org.apache.parquet:parquet-avro + org.apache.parquet:parquet-column com.esotericsoftware:kryo-shaded org.objenesis:objenesis com.esotericsoftware:minlog @@ -155,6 +156,12 @@ compile + + org.apache.parquet + parquet-column + compile + + org.apache.avro avro From 429f7a6654b886402b21e1ef22c3bad407e05daf Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Wed, 3 Nov 2021 17:29:26 +0800 Subject: [PATCH 20/37] test class exception --- .../io/storage/HoodieFileWriterFactory.java | 9 ++++ .../apache/hudi/io/storage/SystemUtils.java | 52 +++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/SystemUtils.java diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java index e88c34f608d3e..3a4c3a728f527 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java @@ -31,7 +31,10 @@ import org.apache.avro.Schema; import org.apache.avro.generic.IndexedRecord; import org.apache.hadoop.fs.Path; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; import org.apache.parquet.avro.AvroSchemaConverter; +import org.apache.parquet.schema.Types; import java.io.IOException; @@ -41,6 +44,8 @@ public class HoodieFileWriterFactory { + private static final Logger LOG = LogManager.getLogger(HoodieFileWriterFactory.class); + public static HoodieFileWriter getFileWriter( String instantTime, Path path, HoodieTable hoodieTable, HoodieWriteConfig config, Schema schema, TaskContextSupplier taskContextSupplier) throws IOException { @@ -67,6 +72,10 @@ private static HoodieFi String instantTime, Path path, HoodieWriteConfig config, Schema schema, HoodieTable hoodieTable, TaskContextSupplier taskContextSupplier, boolean populateMetaFields, boolean enableBloomFilter) throws IOException { Option filter = enableBloomFilter ? Option.of(createBloomFilter(config)) : Option.empty(); + + LOG.error("--" + SystemUtils.getClassLocation(AvroSchemaConverter.class)); + LOG.error("--" + SystemUtils.getClassLocation(Types.class)); + HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter(hoodieTable.getHadoopConf()).convert(schema), schema, filter); HoodieAvroParquetConfig parquetConfig = new HoodieAvroParquetConfig(writeSupport, config.getParquetCompressionCodec(), diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/SystemUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/SystemUtils.java new file mode 100644 index 0000000000000..2fa5132a5c6da --- /dev/null +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/SystemUtils.java @@ -0,0 +1,52 @@ +package org.apache.hudi.io.storage; + +import java.io.File; +import java.net.MalformedURLException; +import java.net.URL; +import java.security.CodeSource; +import java.security.ProtectionDomain; + +/** + * system util. + */ +public class SystemUtils { + + public static URL getClassLocation(final Class cls) { + + if (cls == null) { + throw new IllegalArgumentException("null input: cls"); + } + + URL result = null; + final String clsAsResource = cls.getName().replace('.', '/').concat(".class"); + + final ProtectionDomain pd = cls.getProtectionDomain(); + + if (pd != null) { + final CodeSource cs = pd.getCodeSource(); + if (cs != null) { + result = cs.getLocation(); + } + + if (result != null) { + if ("file".equals(result.getProtocol())) { + try { + if (result.toExternalForm().endsWith(".jar") || result.toExternalForm().endsWith(".zip")) { + result = new URL("jar:".concat(result.toExternalForm()).concat("!/").concat(clsAsResource)); + } else if (new File(result.getFile()).isDirectory()) { + result = new URL(result, clsAsResource); + } + } catch (MalformedURLException ignore) { + System.out.println("-W"); + } + } + } + } + + if (result == null) { + final ClassLoader clsLoader = cls.getClassLoader(); + result = clsLoader != null ? clsLoader.getResource(clsAsResource) : ClassLoader.getSystemResource(clsAsResource); + } + return result; + } +} \ No newline at end of file From ce26ca89a86e91f41bc38dee2acebdc6b65cde06 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Wed, 3 Nov 2021 17:33:40 +0800 Subject: [PATCH 21/37] test class exception --- .../apache/hudi/io/storage/SystemUtils.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/SystemUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/SystemUtils.java index 2fa5132a5c6da..ac30485ed5146 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/SystemUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/SystemUtils.java @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hudi.io.storage; import java.io.File; From 8bd01e8237b9c449d5cc532a2b2a6e6837e487cf Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Wed, 3 Nov 2021 17:52:21 +0800 Subject: [PATCH 22/37] test class exception --- .../org/apache/hudi/io/storage/HoodieFileWriterFactory.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java index 3a4c3a728f527..e92c297f96493 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java @@ -73,8 +73,8 @@ private static HoodieFi TaskContextSupplier taskContextSupplier, boolean populateMetaFields, boolean enableBloomFilter) throws IOException { Option filter = enableBloomFilter ? Option.of(createBloomFilter(config)) : Option.empty(); - LOG.error("--" + SystemUtils.getClassLocation(AvroSchemaConverter.class)); - LOG.error("--" + SystemUtils.getClassLocation(Types.class)); + LOG.error("--123" + SystemUtils.getClassLocation(AvroSchemaConverter.class)); + LOG.error("--123" + SystemUtils.getClassLocation(Types.class)); HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter(hoodieTable.getHadoopConf()).convert(schema), schema, filter); From 7c7ffa1f9980b39df89dc5e924113b39da705a8f Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Thu, 4 Nov 2021 10:34:26 +0800 Subject: [PATCH 23/37] test class exception --- .../apache/hudi/integ/ITTestHoodieSanity.java | 5 ++ .../org/apache/hudi/integ/SystemUtils.java | 70 +++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 hudi-integ-test/src/test/java/org/apache/hudi/integ/SystemUtils.java diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java index e432f9dc423f5..bce8c3db3b3e3 100644 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java @@ -23,6 +23,9 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.io.storage.SystemUtils; +import org.apache.parquet.avro.AvroSchemaConverter; +import org.apache.parquet.schema.Types; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; @@ -89,6 +92,8 @@ public void testRunHoodieJavaAppOnNonPartitionedCOWTable() throws Exception { * console. */ public void testRunHoodieJavaAppOnSinglePartitionKeyMORTable() throws Exception { + LOG.error("--1234" + org.apache.hudi.io.storage.SystemUtils.getClassLocation(AvroSchemaConverter.class)); + LOG.error("--1234" + SystemUtils.getClassLocation(Types.class)); String hiveTableName = "docker_hoodie_single_partition_key_mor_test_" + HoodieActiveTimeline.createNewInstantTime(); testRunHoodieJavaApp(hiveTableName, HoodieTableType.MERGE_ON_READ.name(), PartitionType.SINGLE_KEY_PARTITIONED); diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/SystemUtils.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/SystemUtils.java new file mode 100644 index 0000000000000..6a8f9078fcfa1 --- /dev/null +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/SystemUtils.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.integ; + +import java.io.File; +import java.net.MalformedURLException; +import java.net.URL; +import java.security.CodeSource; +import java.security.ProtectionDomain; + +/** + * system util. + */ +public class SystemUtils { + + public static URL getClassLocation(final Class cls) { + + if (cls == null) { + throw new IllegalArgumentException("null input: cls"); + } + + URL result = null; + final String clsAsResource = cls.getName().replace('.', '/').concat(".class"); + + final ProtectionDomain pd = cls.getProtectionDomain(); + + if (pd != null) { + final CodeSource cs = pd.getCodeSource(); + if (cs != null) { + result = cs.getLocation(); + } + + if (result != null) { + if ("file".equals(result.getProtocol())) { + try { + if (result.toExternalForm().endsWith(".jar") || result.toExternalForm().endsWith(".zip")) { + result = new URL("jar:".concat(result.toExternalForm()).concat("!/").concat(clsAsResource)); + } else if (new File(result.getFile()).isDirectory()) { + result = new URL(result, clsAsResource); + } + } catch (MalformedURLException ignore) { + System.out.println("-W"); + } + } + } + } + + if (result == null) { + final ClassLoader clsLoader = cls.getClassLoader(); + result = clsLoader != null ? clsLoader.getResource(clsAsResource) : ClassLoader.getSystemResource(clsAsResource); + } + return result; + } +} \ No newline at end of file From 491b4ea02db257c8e10d0a8b39422e48d82b39f4 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Thu, 4 Nov 2021 14:40:12 +0800 Subject: [PATCH 24/37] test class exception --- hudi-integ-test/pom.xml | 4 ---- hudi-sync/hudi-dla-sync/pom.xml | 5 +++++ hudi-sync/hudi-hive-sync/pom.xml | 5 +++++ hudi-sync/hudi-sync-common/pom.xml | 4 ++++ hudi-utilities/pom.xml | 5 +++++ 5 files changed, 19 insertions(+), 4 deletions(-) diff --git a/hudi-integ-test/pom.xml b/hudi-integ-test/pom.xml index d45959318376e..c810de6ecb310 100644 --- a/hudi-integ-test/pom.xml +++ b/hudi-integ-test/pom.xml @@ -375,10 +375,6 @@ org.eclipse.jetty * - - parquet-hadoop-bundle - org.apache.parquet - test diff --git a/hudi-sync/hudi-dla-sync/pom.xml b/hudi-sync/hudi-dla-sync/pom.xml index 0a4e1d2ba0c86..e065c2d9fdd96 100644 --- a/hudi-sync/hudi-dla-sync/pom.xml +++ b/hudi-sync/hudi-dla-sync/pom.xml @@ -107,6 +107,11 @@ parquet-avro + + org.apache.parquet + parquet-column + + org.apache.hadoop diff --git a/hudi-sync/hudi-hive-sync/pom.xml b/hudi-sync/hudi-hive-sync/pom.xml index 74139e4e4b427..aeb8b492d65c3 100644 --- a/hudi-sync/hudi-hive-sync/pom.xml +++ b/hudi-sync/hudi-hive-sync/pom.xml @@ -64,6 +64,11 @@ parquet-avro + + org.apache.parquet + parquet-column + + com.beust jcommander diff --git a/hudi-sync/hudi-sync-common/pom.xml b/hudi-sync/hudi-sync-common/pom.xml index 5484290654a1e..113719b791579 100644 --- a/hudi-sync/hudi-sync-common/pom.xml +++ b/hudi-sync/hudi-sync-common/pom.xml @@ -41,6 +41,10 @@ org.apache.parquet parquet-avro + + org.apache.parquet + parquet-column + org.apache.hadoop hadoop-common diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml index e72c82ef9bec2..8b384a3a18f8b 100644 --- a/hudi-utilities/pom.xml +++ b/hudi-utilities/pom.xml @@ -183,6 +183,11 @@ + + org.apache.parquet + parquet-column + + org.apache.spark From 4548198ee7a3ed5e74fd3c6f9d8df73fd4700d21 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Thu, 4 Nov 2021 16:25:13 +0800 Subject: [PATCH 25/37] test class exception --- hudi-integ-test/pom.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hudi-integ-test/pom.xml b/hudi-integ-test/pom.xml index c810de6ecb310..b0e3bcf405bf6 100644 --- a/hudi-integ-test/pom.xml +++ b/hudi-integ-test/pom.xml @@ -100,11 +100,15 @@ org.apache.parquet parquet-avro + ${parquet.version} + compile org.apache.parquet parquet-column + ${parquet.version} + compile From c646ab982ea9137bc84c53c8195dabc1833a34ea Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Thu, 4 Nov 2021 17:36:59 +0800 Subject: [PATCH 26/37] test class exception --- hudi-client/hudi-client-common/pom.xml | 4 ++++ hudi-integ-test/pom.xml | 16 ++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/hudi-client/hudi-client-common/pom.xml b/hudi-client/hudi-client-common/pom.xml index c7de8e77fefd9..56c397c171922 100644 --- a/hudi-client/hudi-client-common/pom.xml +++ b/hudi-client/hudi-client-common/pom.xml @@ -57,11 +57,15 @@ org.apache.parquet parquet-avro + ${parquet.version} + compile org.apache.parquet parquet-column + ${parquet.version} + compile diff --git a/hudi-integ-test/pom.xml b/hudi-integ-test/pom.xml index b0e3bcf405bf6..79ece33ec3a48 100644 --- a/hudi-integ-test/pom.xml +++ b/hudi-integ-test/pom.xml @@ -85,14 +85,14 @@ org.apache.curator * - - parquet-hadoop - org.apache.parquet - - - parquet-column - org.apache.parquet - + + parquet-hadoop + org.apache.parquet + + + parquet-column + org.apache.parquet + From f8efb1f54c2818f101db8ad90a4bc917a8035362 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Fri, 5 Nov 2021 11:15:22 +0800 Subject: [PATCH 27/37] test class exception --- packaging/hudi-spark-bundle/pom.xml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml index e9bc522683cc7..46594f113f464 100644 --- a/packaging/hudi-spark-bundle/pom.xml +++ b/packaging/hudi-spark-bundle/pom.xml @@ -264,11 +264,13 @@ org.apache.parquet parquet-avro + compile org.apache.parquet parquet-column + compile From 661c2d45f1ce2eb1f973300092703a4f50c7736b Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Fri, 5 Nov 2021 15:02:09 +0800 Subject: [PATCH 28/37] test class exception --- hudi-utilities/pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml index 8b384a3a18f8b..74e3cd9428103 100644 --- a/hudi-utilities/pom.xml +++ b/hudi-utilities/pom.xml @@ -186,6 +186,7 @@ org.apache.parquet parquet-column + compile From 4ca88527a8eb5087f3cd43fe5ba0e3e2fcf3a382 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Tue, 25 Jan 2022 15:58:39 +0800 Subject: [PATCH 29/37] HUDI-2370 --- hudi-cli/pom.xml | 15 ---- hudi-client/hudi-client-common/pom.xml | 9 --- .../io/storage/HoodieFileWriterFactory.java | 7 -- .../apache/hudi/io/storage/SystemUtils.java | 70 ------------------- hudi-client/hudi-java-client/pom.xml | 5 -- hudi-client/hudi-spark-client/pom.xml | 15 ---- ...rkInsertOverwriteCommitActionExecutor.java | 2 +- hudi-common/pom.xml | 5 -- hudi-examples/pom.xml | 16 ----- hudi-flink/pom.xml | 8 +-- hudi-hadoop-mr/pom.xml | 5 -- hudi-integ-test/pom.xml | 23 ------ .../apache/hudi/integ/ITTestHoodieSanity.java | 5 -- hudi-spark-datasource/hudi-spark/pom.xml | 20 ------ .../hudi/MergeOnReadIncrementalRelation.scala | 1 - .../spark3/crypto}/TestParquetEncryption.java | 2 +- hudi-sync/hudi-dla-sync/pom.xml | 5 -- hudi-sync/hudi-hive-sync/pom.xml | 15 ---- hudi-sync/hudi-sync-common/pom.xml | 4 -- hudi-utilities/pom.xml | 20 ------ packaging/hudi-flink-bundle/pom.xml | 2 +- packaging/hudi-hadoop-mr-bundle/pom.xml | 13 ---- packaging/hudi-integ-test-bundle/pom.xml | 10 --- packaging/hudi-spark-bundle/pom.xml | 7 -- packaging/hudi-utilities-bundle/pom.xml | 7 -- pom.xml | 9 +-- 26 files changed, 5 insertions(+), 295 deletions(-) delete mode 100644 hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/SystemUtils.java rename hudi-spark-datasource/{hudi-spark/src/test/java/org/apache/hudi/crypot => hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto}/TestParquetEncryption.java (99%) diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml index eaead9d15958f..29bdf85ab08c5 100644 --- a/hudi-cli/pom.xml +++ b/hudi-cli/pom.xml @@ -216,11 +216,6 @@ - - org.apache.parquet - parquet-column - - org.apache.spark @@ -229,16 +224,6 @@ org.apache.spark spark-sql_${scala.binary.version} - - - parquet-column - org.apache.parquet - - - parquet-hadoop - org.apache.parquet - - org.apache.spark diff --git a/hudi-client/hudi-client-common/pom.xml b/hudi-client/hudi-client-common/pom.xml index 1caa1c55192da..a9209f5534df8 100644 --- a/hudi-client/hudi-client-common/pom.xml +++ b/hudi-client/hudi-client-common/pom.xml @@ -62,15 +62,6 @@ org.apache.parquet parquet-avro - ${parquet.version} - compile - - - - org.apache.parquet - parquet-column - ${parquet.version} - compile diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java index 1e9711b82ce45..4b0f9aabd8b4f 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java @@ -31,8 +31,6 @@ import org.apache.avro.Schema; import org.apache.avro.generic.IndexedRecord; import org.apache.hadoop.fs.Path; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; import org.apache.parquet.avro.AvroSchemaConverter; import org.apache.parquet.schema.Types; @@ -48,8 +46,6 @@ public class HoodieFileWriterFactory { - private static final Logger LOG = LogManager.getLogger(HoodieFileWriterFactory.class); - public static HoodieFileWriter getFileWriter( String instantTime, Path path, HoodieTable hoodieTable, HoodieWriteConfig config, Schema schema, TaskContextSupplier taskContextSupplier) throws IOException { @@ -77,9 +73,6 @@ private static HoodieFi TaskContextSupplier taskContextSupplier, boolean populateMetaFields, boolean enableBloomFilter) throws IOException { Option filter = enableBloomFilter ? Option.of(createBloomFilter(config)) : Option.empty(); - LOG.error("--123" + SystemUtils.getClassLocation(AvroSchemaConverter.class)); - LOG.error("--123" + SystemUtils.getClassLocation(Types.class)); - HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter(hoodieTable.getHadoopConf()).convert(schema), schema, filter); HoodieAvroParquetConfig parquetConfig = new HoodieAvroParquetConfig(writeSupport, config.getParquetCompressionCodec(), diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/SystemUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/SystemUtils.java deleted file mode 100644 index ac30485ed5146..0000000000000 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/SystemUtils.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.io.storage; - -import java.io.File; -import java.net.MalformedURLException; -import java.net.URL; -import java.security.CodeSource; -import java.security.ProtectionDomain; - -/** - * system util. - */ -public class SystemUtils { - - public static URL getClassLocation(final Class cls) { - - if (cls == null) { - throw new IllegalArgumentException("null input: cls"); - } - - URL result = null; - final String clsAsResource = cls.getName().replace('.', '/').concat(".class"); - - final ProtectionDomain pd = cls.getProtectionDomain(); - - if (pd != null) { - final CodeSource cs = pd.getCodeSource(); - if (cs != null) { - result = cs.getLocation(); - } - - if (result != null) { - if ("file".equals(result.getProtocol())) { - try { - if (result.toExternalForm().endsWith(".jar") || result.toExternalForm().endsWith(".zip")) { - result = new URL("jar:".concat(result.toExternalForm()).concat("!/").concat(clsAsResource)); - } else if (new File(result.getFile()).isDirectory()) { - result = new URL(result, clsAsResource); - } - } catch (MalformedURLException ignore) { - System.out.println("-W"); - } - } - } - } - - if (result == null) { - final ClassLoader clsLoader = cls.getClassLoader(); - result = clsLoader != null ? clsLoader.getResource(clsAsResource) : ClassLoader.getSystemResource(clsAsResource); - } - return result; - } -} \ No newline at end of file diff --git a/hudi-client/hudi-java-client/pom.xml b/hudi-client/hudi-java-client/pom.xml index 04a08bfe7621d..3471bfb8ba366 100644 --- a/hudi-client/hudi-java-client/pom.xml +++ b/hudi-client/hudi-java-client/pom.xml @@ -43,11 +43,6 @@ parquet-avro - - org.apache.parquet - parquet-column - - org.apache.hudi diff --git a/hudi-client/hudi-spark-client/pom.xml b/hudi-client/hudi-spark-client/pom.xml index ce3c3c3af719c..e4a8fd56b6a65 100644 --- a/hudi-client/hudi-spark-client/pom.xml +++ b/hudi-client/hudi-spark-client/pom.xml @@ -52,16 +52,6 @@ org.apache.spark spark-sql_${scala.binary.version} - - - parquet-column - org.apache.parquet - - - parquet-hadoop - org.apache.parquet - - org.apache.spark @@ -75,11 +65,6 @@ parquet-avro - - org.apache.parquet - parquet-column - - org.apache.hudi diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteCommitActionExecutor.java index 7a3549c9e9c41..39af20474bd0f 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteCommitActionExecutor.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteCommitActionExecutor.java @@ -60,7 +60,7 @@ public HoodieWriteMetadata> execute() { return SparkWriteHelper.newInstance().write(instantTime, inputRecordsRDD, context, table, config.shouldCombineBeforeInsert(), config.getInsertShuffleParallelism(), this, operationType); } - +git @Override protected Partitioner getPartitioner(WorkloadProfile profile) { return table.getStorageLayout().layoutPartitionerClass() diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml index 0131fef659061..c20ff22f8c9ee 100644 --- a/hudi-common/pom.xml +++ b/hudi-common/pom.xml @@ -119,11 +119,6 @@ parquet-avro - - org.apache.parquet - parquet-column - - org.apache.orc diff --git a/hudi-examples/pom.xml b/hudi-examples/pom.xml index 8e7bdd9cf217a..2ea284f203209 100644 --- a/hudi-examples/pom.xml +++ b/hudi-examples/pom.xml @@ -177,16 +177,6 @@ org.apache.spark spark-sql_${scala.binary.version} - - - parquet-column - org.apache.parquet - - - parquet-hadoop - org.apache.parquet - - org.apache.spark @@ -200,12 +190,6 @@ ${parquet.version} - - org.apache.parquet - parquet-column - ${parquet.version} - - org.apache.avro diff --git a/hudi-flink/pom.xml b/hudi-flink/pom.xml index c707cc2515dd8..c8fac38be5b18 100644 --- a/hudi-flink/pom.xml +++ b/hudi-flink/pom.xml @@ -30,7 +30,7 @@ ${project.parent.basedir} - 1.12.0 + 1.11.1 @@ -149,12 +149,6 @@ flink-parquet_${scala.binary.version} ${flink.version} provided - - - parquet-hadoop - org.apache.parquet - - org.apache.flink diff --git a/hudi-hadoop-mr/pom.xml b/hudi-hadoop-mr/pom.xml index 57d506b6017d0..bf87bfaa36a81 100644 --- a/hudi-hadoop-mr/pom.xml +++ b/hudi-hadoop-mr/pom.xml @@ -49,11 +49,6 @@ parquet-avro - - org.apache.parquet - parquet-column - - org.apache.hadoop diff --git a/hudi-integ-test/pom.xml b/hudi-integ-test/pom.xml index 212c752eb4687..7ca976f9f80a6 100644 --- a/hudi-integ-test/pom.xml +++ b/hudi-integ-test/pom.xml @@ -85,32 +85,9 @@ org.apache.curator * - - parquet-hadoop - org.apache.parquet - - - parquet-column - org.apache.parquet - - - - org.apache.parquet - parquet-avro - ${parquet.version} - compile - - - - org.apache.parquet - parquet-column - ${parquet.version} - compile - - org.apache.hudi diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java index bce8c3db3b3e3..e432f9dc423f5 100644 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java @@ -23,9 +23,6 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; -import org.apache.hudi.io.storage.SystemUtils; -import org.apache.parquet.avro.AvroSchemaConverter; -import org.apache.parquet.schema.Types; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; @@ -92,8 +89,6 @@ public void testRunHoodieJavaAppOnNonPartitionedCOWTable() throws Exception { * console. */ public void testRunHoodieJavaAppOnSinglePartitionKeyMORTable() throws Exception { - LOG.error("--1234" + org.apache.hudi.io.storage.SystemUtils.getClassLocation(AvroSchemaConverter.class)); - LOG.error("--1234" + SystemUtils.getClassLocation(Types.class)); String hiveTableName = "docker_hoodie_single_partition_key_mor_test_" + HoodieActiveTimeline.createNewInstantTime(); testRunHoodieJavaApp(hiveTableName, HoodieTableType.MERGE_ON_READ.name(), PartitionType.SINGLE_KEY_PARTITIONED); diff --git a/hudi-spark-datasource/hudi-spark/pom.xml b/hudi-spark-datasource/hudi-spark/pom.xml index 6d5925e7994d8..e20f4ad48c079 100644 --- a/hudi-spark-datasource/hudi-spark/pom.xml +++ b/hudi-spark-datasource/hudi-spark/pom.xml @@ -291,16 +291,6 @@ org.apache.spark spark-sql_${scala.binary.version} - - - parquet-column - org.apache.parquet - - - parquet-hadoop - org.apache.parquet - - @@ -313,16 +303,6 @@ spark-sql_${scala.binary.version} tests test - - - parquet-column - org.apache.parquet - - - parquet-hadoop - org.apache.parquet - - org.apache.spark diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala index 1d819d78efead..b4a9800d994b9 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala @@ -114,7 +114,6 @@ class MergeOnReadIncrementalRelation(val sqlContext: SQLContext, sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.filterPushdown", "true") sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.recordLevelFilter.enabled", "true") sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "false") - sqlContext.sparkSession.sessionState.conf.setConfString("parquet.filter.columnindex.enabled", "false") val pushDownFilter = { val isNotNullFilter = IsNotNull(HoodieRecord.COMMIT_TIME_METADATA_FIELD) val largerThanFilter = GreaterThanOrEqual(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitsToReturn.head.getTimestamp) diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/crypot/TestParquetEncryption.java b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/TestParquetEncryption.java similarity index 99% rename from hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/crypot/TestParquetEncryption.java rename to hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/TestParquetEncryption.java index f36d2409fa9a5..48bf9a4a5c50a 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/crypot/TestParquetEncryption.java +++ b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/TestParquetEncryption.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.hudi.crypot; +package org.apache.hudi.spark3.crypto; import org.apache.hudi.crypot.kms.InMemoryKMS; import org.apache.hudi.testutils.HoodieClientTestBase; diff --git a/hudi-sync/hudi-dla-sync/pom.xml b/hudi-sync/hudi-dla-sync/pom.xml index 1c32ae8f09c18..afb5717318f99 100644 --- a/hudi-sync/hudi-dla-sync/pom.xml +++ b/hudi-sync/hudi-dla-sync/pom.xml @@ -107,11 +107,6 @@ parquet-avro - - org.apache.parquet - parquet-column - - org.apache.hadoop diff --git a/hudi-sync/hudi-hive-sync/pom.xml b/hudi-sync/hudi-hive-sync/pom.xml index bd332290b3651..7cc51a3068132 100644 --- a/hudi-sync/hudi-hive-sync/pom.xml +++ b/hudi-sync/hudi-hive-sync/pom.xml @@ -64,11 +64,6 @@ parquet-avro - - org.apache.parquet - parquet-column - - com.beust jcommander @@ -147,16 +142,6 @@ org.apache.spark spark-sql_${scala.binary.version} test - - - parquet-column - org.apache.parquet - - - parquet-hadoop - org.apache.parquet - - diff --git a/hudi-sync/hudi-sync-common/pom.xml b/hudi-sync/hudi-sync-common/pom.xml index 2a58a3903fa26..1f1abb4f177f1 100644 --- a/hudi-sync/hudi-sync-common/pom.xml +++ b/hudi-sync/hudi-sync-common/pom.xml @@ -41,10 +41,6 @@ org.apache.parquet parquet-avro - - org.apache.parquet - parquet-column - org.apache.hadoop hadoop-common diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml index 43c0ad49bf690..40acaac77082b 100644 --- a/hudi-utilities/pom.xml +++ b/hudi-utilities/pom.xml @@ -200,12 +200,6 @@ - - org.apache.parquet - parquet-column - compile - - org.apache.spark @@ -225,20 +219,6 @@ org.apache.spark spark-sql_${scala.binary.version} - - - javax.servlet - * - - - parquet-hadoop - org.apache.parquet - - - parquet-column - org.apache.parquet - - diff --git a/packaging/hudi-flink-bundle/pom.xml b/packaging/hudi-flink-bundle/pom.xml index 1962e3ec33c92..b31113a3748d1 100644 --- a/packaging/hudi-flink-bundle/pom.xml +++ b/packaging/hudi-flink-bundle/pom.xml @@ -35,7 +35,7 @@ org.apache.hudi. 3.1.0 - 1.12.0 + 1.11.1 2.3.1 0.9.3 diff --git a/packaging/hudi-hadoop-mr-bundle/pom.xml b/packaging/hudi-hadoop-mr-bundle/pom.xml index d04efb1d66e09..23e5a0b46f208 100644 --- a/packaging/hudi-hadoop-mr-bundle/pom.xml +++ b/packaging/hudi-hadoop-mr-bundle/pom.xml @@ -68,7 +68,6 @@ org.apache.hudi:hudi-hadoop-mr org.apache.parquet:parquet-avro - org.apache.parquet:parquet-column com.esotericsoftware:kryo-shaded org.objenesis:objenesis com.esotericsoftware:minlog @@ -150,18 +149,6 @@ - - org.apache.parquet - parquet-avro - compile - - - - org.apache.parquet - parquet-column - compile - - org.apache.avro avro diff --git a/packaging/hudi-integ-test-bundle/pom.xml b/packaging/hudi-integ-test-bundle/pom.xml index b8d1f0b3d253b..d90f6acb01898 100644 --- a/packaging/hudi-integ-test-bundle/pom.xml +++ b/packaging/hudi-integ-test-bundle/pom.xml @@ -544,16 +544,6 @@ org.apache.spark spark-sql_${scala.binary.version} - - - parquet-column - org.apache.parquet - - - parquet-hadoop - org.apache.parquet - - diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml index 3bcd56a90889f..7ab8c21700229 100644 --- a/packaging/hudi-spark-bundle/pom.xml +++ b/packaging/hudi-spark-bundle/pom.xml @@ -87,7 +87,6 @@ org.apache.httpcomponents:fluent-hc org.antlr:stringtemplate org.apache.parquet:parquet-avro - org.apache.parquet:parquet-column com.github.davidmoten:guava-mini com.github.davidmoten:hilbert-curve @@ -274,12 +273,6 @@ compile - - org.apache.parquet - parquet-column - compile - - ${hive.groupid} diff --git a/packaging/hudi-utilities-bundle/pom.xml b/packaging/hudi-utilities-bundle/pom.xml index 7caef7a13ff64..b9e372ca2c770 100644 --- a/packaging/hudi-utilities-bundle/pom.xml +++ b/packaging/hudi-utilities-bundle/pom.xml @@ -112,7 +112,6 @@ org.apache.httpcomponents:fluent-hc org.antlr:stringtemplate org.apache.parquet:parquet-avro - org.apache.parquet:parquet-column com.amazonaws:dynamodb-lock-client com.amazonaws:aws-java-sdk-dynamodb @@ -293,12 +292,6 @@ compile - - org.apache.parquet - parquet-column - compile - - ${hive.groupid} diff --git a/pom.xml b/pom.xml index e462b47438e64..e48ad80fc6d71 100644 --- a/pom.xml +++ b/pom.xml @@ -97,7 +97,7 @@ 2.8.1 5.3.4 2.17 - 1.12.0 + 1.10.1 5.7.0-M1 5.7.0-M1 1.7.0-M1 @@ -555,13 +555,6 @@ provided - - org.apache.parquet - parquet-column - ${parquet.version} - provided - - org.apache.spark From f85aeac825205ef91e31ca4a12183c1501d12d9d Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Tue, 25 Jan 2022 16:03:07 +0800 Subject: [PATCH 30/37] HUDI-2370 --- .../io/storage/HoodieFileWriterFactory.java | 2 - ...rkInsertOverwriteCommitActionExecutor.java | 2 +- .../org/apache/hudi/integ/SystemUtils.java | 70 ------------------- hudi-utilities/pom.xml | 6 ++ packaging/hudi-flink-bundle/pom.xml | 6 -- packaging/hudi-hadoop-mr-bundle/pom.xml | 6 ++ 6 files changed, 13 insertions(+), 79 deletions(-) delete mode 100644 hudi-integ-test/src/test/java/org/apache/hudi/integ/SystemUtils.java diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java index 4b0f9aabd8b4f..0b6afd4d28b92 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java @@ -32,7 +32,6 @@ import org.apache.avro.generic.IndexedRecord; import org.apache.hadoop.fs.Path; import org.apache.parquet.avro.AvroSchemaConverter; -import org.apache.parquet.schema.Types; import java.io.IOException; @@ -72,7 +71,6 @@ private static HoodieFi String instantTime, Path path, HoodieWriteConfig config, Schema schema, HoodieTable hoodieTable, TaskContextSupplier taskContextSupplier, boolean populateMetaFields, boolean enableBloomFilter) throws IOException { Option filter = enableBloomFilter ? Option.of(createBloomFilter(config)) : Option.empty(); - HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter(hoodieTable.getHadoopConf()).convert(schema), schema, filter); HoodieAvroParquetConfig parquetConfig = new HoodieAvroParquetConfig(writeSupport, config.getParquetCompressionCodec(), diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteCommitActionExecutor.java index 39af20474bd0f..7a3549c9e9c41 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteCommitActionExecutor.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteCommitActionExecutor.java @@ -60,7 +60,7 @@ public HoodieWriteMetadata> execute() { return SparkWriteHelper.newInstance().write(instantTime, inputRecordsRDD, context, table, config.shouldCombineBeforeInsert(), config.getInsertShuffleParallelism(), this, operationType); } -git + @Override protected Partitioner getPartitioner(WorkloadProfile profile) { return table.getStorageLayout().layoutPartitionerClass() diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/SystemUtils.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/SystemUtils.java deleted file mode 100644 index 6a8f9078fcfa1..0000000000000 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/SystemUtils.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.integ; - -import java.io.File; -import java.net.MalformedURLException; -import java.net.URL; -import java.security.CodeSource; -import java.security.ProtectionDomain; - -/** - * system util. - */ -public class SystemUtils { - - public static URL getClassLocation(final Class cls) { - - if (cls == null) { - throw new IllegalArgumentException("null input: cls"); - } - - URL result = null; - final String clsAsResource = cls.getName().replace('.', '/').concat(".class"); - - final ProtectionDomain pd = cls.getProtectionDomain(); - - if (pd != null) { - final CodeSource cs = pd.getCodeSource(); - if (cs != null) { - result = cs.getLocation(); - } - - if (result != null) { - if ("file".equals(result.getProtocol())) { - try { - if (result.toExternalForm().endsWith(".jar") || result.toExternalForm().endsWith(".zip")) { - result = new URL("jar:".concat(result.toExternalForm()).concat("!/").concat(clsAsResource)); - } else if (new File(result.getFile()).isDirectory()) { - result = new URL(result, clsAsResource); - } - } catch (MalformedURLException ignore) { - System.out.println("-W"); - } - } - } - } - - if (result == null) { - final ClassLoader clsLoader = cls.getClassLoader(); - result = clsLoader != null ? clsLoader.getResource(clsAsResource) : ClassLoader.getSystemResource(clsAsResource); - } - return result; - } -} \ No newline at end of file diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml index 40acaac77082b..65a520f964e1b 100644 --- a/hudi-utilities/pom.xml +++ b/hudi-utilities/pom.xml @@ -219,6 +219,12 @@ org.apache.spark spark-sql_${scala.binary.version} + + + javax.servlet + * + + diff --git a/packaging/hudi-flink-bundle/pom.xml b/packaging/hudi-flink-bundle/pom.xml index b31113a3748d1..bbc9620b4bd2a 100644 --- a/packaging/hudi-flink-bundle/pom.xml +++ b/packaging/hudi-flink-bundle/pom.xml @@ -342,12 +342,6 @@ flink-parquet_${scala.binary.version} ${flink.version} compile - - - parquet-hadoop - org.apache.parquet - - org.apache.flink diff --git a/packaging/hudi-hadoop-mr-bundle/pom.xml b/packaging/hudi-hadoop-mr-bundle/pom.xml index 23e5a0b46f208..a426e74df08d6 100644 --- a/packaging/hudi-hadoop-mr-bundle/pom.xml +++ b/packaging/hudi-hadoop-mr-bundle/pom.xml @@ -149,6 +149,12 @@ + + org.apache.parquet + parquet-avro + compile + + org.apache.avro avro From 8babbfa83c9957b679b1a807b3e88c54c6fb02ed Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Tue, 25 Jan 2022 16:08:04 +0800 Subject: [PATCH 31/37] HUDI-2370 --- .../java/org/apache/hudi/spark3/crypto}/InMemoryKMS.java | 2 +- .../apache/hudi/spark3/crypto/TestParquetEncryption.java | 1 - hudi-utilities/pom.xml | 8 ++++---- 3 files changed, 5 insertions(+), 6 deletions(-) rename {hudi-common/src/main/java/org/apache/hudi/crypot/kms => hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto}/InMemoryKMS.java (99%) diff --git a/hudi-common/src/main/java/org/apache/hudi/crypot/kms/InMemoryKMS.java b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java similarity index 99% rename from hudi-common/src/main/java/org/apache/hudi/crypot/kms/InMemoryKMS.java rename to hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java index 44421e1cc81c8..4347d260ac99e 100644 --- a/hudi-common/src/main/java/org/apache/hudi/crypot/kms/InMemoryKMS.java +++ b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.hudi.crypot.kms; +package org.apache.hudi.spark3.crypto; import org.apache.hadoop.conf.Configuration; import org.apache.hudi.common.config.ConfigProperty; diff --git a/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/TestParquetEncryption.java b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/TestParquetEncryption.java index 48bf9a4a5c50a..72023a64b28c1 100644 --- a/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/TestParquetEncryption.java +++ b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/TestParquetEncryption.java @@ -18,7 +18,6 @@ package org.apache.hudi.spark3.crypto; -import org.apache.hudi.crypot.kms.InMemoryKMS; import org.apache.hudi.testutils.HoodieClientTestBase; import org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory; import org.apache.spark.api.java.JavaSparkContext; diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml index 65a520f964e1b..cb3926548f226 100644 --- a/hudi-utilities/pom.xml +++ b/hudi-utilities/pom.xml @@ -220,10 +220,10 @@ org.apache.spark spark-sql_${scala.binary.version} - - javax.servlet - * - + + javax.servlet + * + From a4688a962fedeeab27ce030396ce86622e6083d2 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Tue, 25 Jan 2022 16:12:06 +0800 Subject: [PATCH 32/37] HUDI-2370 --- hudi-utilities/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml index cb3926548f226..2e68039c1322d 100644 --- a/hudi-utilities/pom.xml +++ b/hudi-utilities/pom.xml @@ -224,7 +224,7 @@ javax.servlet * - + From 1981bf744932c4a9dcffb8bdcf16432345df8ebb Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Tue, 25 Jan 2022 16:23:05 +0800 Subject: [PATCH 33/37] HUDI-2370 --- hudi-spark-datasource/hudi-spark3/pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hudi-spark-datasource/hudi-spark3/pom.xml b/hudi-spark-datasource/hudi-spark3/pom.xml index ca09d8359f96b..26504b134cba4 100644 --- a/hudi-spark-datasource/hudi-spark3/pom.xml +++ b/hudi-spark-datasource/hudi-spark3/pom.xml @@ -161,6 +161,12 @@ true + + org.apache.parquet + parquet-hadoop + 1.12.0 + + com.fasterxml.jackson.core jackson-databind From 26c72bc763d3364c8ce6a39b62942090416eee83 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Mon, 7 Feb 2022 10:48:48 +0800 Subject: [PATCH 34/37] HUDI-2370 --- .../test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java | 1 + .../org/apache/hudi/spark3/crypto/TestParquetEncryption.java | 3 +++ 2 files changed, 4 insertions(+) diff --git a/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java index 4347d260ac99e..2d876491ccabb 100644 --- a/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java +++ b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java @@ -35,6 +35,7 @@ /** * This is a mock class, built for testing only. Don't use it as an example of KmsClient implementation. * (org.apache.parquet.crypto.keytools.samples.VaultClient is the sample implementation). + * This function is only recommended for use in spark3.2+. */ public class InMemoryKMS implements KmsClient { private static final Logger LOG = LoggerFactory.getLogger(InMemoryKMS.class); diff --git a/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/TestParquetEncryption.java b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/TestParquetEncryption.java index 72023a64b28c1..70dcf3a92dd85 100644 --- a/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/TestParquetEncryption.java +++ b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/TestParquetEncryption.java @@ -36,6 +36,9 @@ import static org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings; +/** + * This function is only recommended for use in spark3.2+. + */ public class TestParquetEncryption extends HoodieClientTestBase { private SparkSession spark; From cce33658e4da93cdcfa7ba71f5e6c848094eb080 Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Wed, 9 Feb 2022 14:38:43 +0800 Subject: [PATCH 35/37] HUDI-2370 --- .../src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java | 1 + 1 file changed, 1 insertion(+) diff --git a/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java index 2d876491ccabb..6d108388b6b3e 100644 --- a/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java +++ b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java @@ -36,6 +36,7 @@ * This is a mock class, built for testing only. Don't use it as an example of KmsClient implementation. * (org.apache.parquet.crypto.keytools.samples.VaultClient is the sample implementation). * This function is only recommended for use in spark3.2+. + * The encryption function is experimental. */ public class InMemoryKMS implements KmsClient { private static final Logger LOG = LoggerFactory.getLogger(InMemoryKMS.class); From f74353dc3cb16970472fa230aef50e0889a5ef2d Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Wed, 9 Feb 2022 14:56:48 +0800 Subject: [PATCH 36/37] HUDI-2370 --- .../test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java index 6d108388b6b3e..b0873b1cff87a 100644 --- a/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java +++ b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java @@ -36,7 +36,7 @@ * This is a mock class, built for testing only. Don't use it as an example of KmsClient implementation. * (org.apache.parquet.crypto.keytools.samples.VaultClient is the sample implementation). * This function is only recommended for use in spark3.2+. - * The encryption function is experimental. + * The encryption function is experimental, because InMemoryKMS is not reliable. */ public class InMemoryKMS implements KmsClient { private static final Logger LOG = LoggerFactory.getLogger(InMemoryKMS.class); From c36dc87a9741fdad311c6bb78d7b36f9681ef75b Mon Sep 17 00:00:00 2001 From: liujinhui1994 <965147871@qq.com> Date: Wed, 9 Feb 2022 14:58:09 +0800 Subject: [PATCH 37/37] HUDI-2370 --- .../test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java | 2 +- .../org/apache/hudi/spark3/crypto/TestParquetEncryption.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java index b0873b1cff87a..e1f4e03dde604 100644 --- a/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java +++ b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/InMemoryKMS.java @@ -42,7 +42,7 @@ public class InMemoryKMS implements KmsClient { private static final Logger LOG = LoggerFactory.getLogger(InMemoryKMS.class); private static final ConfigProperty KEY_LIST_PROPERTY_NAME = ConfigProperty - .key("hoodie.parquet.encryption.key.list") + .key("hoodie.parquet.encryption.keys") .noDefaultValue() .withDocumentation("hudi parquet encryption key list, example: keyName_1:value_1, keyName_2:value_2"); diff --git a/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/TestParquetEncryption.java b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/TestParquetEncryption.java index 70dcf3a92dd85..dd885b3084415 100644 --- a/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/TestParquetEncryption.java +++ b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/crypto/TestParquetEncryption.java @@ -78,7 +78,7 @@ public void testEncryption() { jsc.hadoopConfiguration().set("parquet.encryption.kms.client.class", InMemoryKMS.class.getName()); jsc.hadoopConfiguration().set("parquet.encryption.footer.key", "k1"); jsc.hadoopConfiguration().set("parquet.encryption.column.keys", "k2:rider,_row_key"); - jsc.hadoopConfiguration().set("hoodie.parquet.encryption.key.list", "k1:AAECAwQFBgcICQoLDA0ODw==, k2:AAECAAECAAECAAECAAECAA=="); + jsc.hadoopConfiguration().set("hoodie.parquet.encryption.keys", "k1:AAECAwQFBgcICQoLDA0ODw==, k2:AAECAAECAAECAAECAAECAA=="); List records1 = recordsToStrings(dataGen.generateInserts("000", 100)); Dataset inputDF1 = spark.read().json(jsc.parallelize(records1, 2)); @@ -97,7 +97,7 @@ public void testEncryption() { //2 has footer key, has column key jsc.hadoopConfiguration().set("parquet.crypto.factory.class", PropertiesDrivenCryptoFactory.class.getName()); jsc.hadoopConfiguration().set("parquet.encryption.kms.client.class", InMemoryKMS.class.getName()); - jsc.hadoopConfiguration().set("hoodie.parquet.encryption.key.list", "k1:AAECAwQFBgcICQoLDA0ODw==, k2:AAECAAECAAECAAECAAECAA=="); + jsc.hadoopConfiguration().set("hoodie.parquet.encryption.keys", "k1:AAECAwQFBgcICQoLDA0ODw==, k2:AAECAAECAAECAAECAAECAA=="); Assertions.assertEquals(100, spark.read().format("org.apache.hudi").load(basePath).count()); Assertions.assertDoesNotThrow(() -> spark.read().format("org.apache.hudi").load(basePath).select("rider").show(1)); }