diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala index 332455ea217ef..680897d7ee533 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala @@ -171,7 +171,7 @@ trait ProvidesHoodieConfig extends Logging { logInfo(s"Insert statement use write operation type: $operation, payloadClass: $payloadClassName") - withSparkConf(sparkSession, catalogProperties) { + withSparkConf(sparkSession, catalogProperties ++ extraOptions) { Map( "path" -> path, TABLE_TYPE.key -> tableType, diff --git a/packaging/bundle-validation/spark_hadoop_mr/validateCtasTableType.scala b/packaging/bundle-validation/spark_hadoop_mr/validateCtasTableType.scala new file mode 100644 index 0000000000000..80cb44389e95d --- /dev/null +++ b/packaging/bundle-validation/spark_hadoop_mr/validateCtasTableType.scala @@ -0,0 +1,60 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +import org.apache.spark.sql.hudi.HoodieSqlCommonUtils + +val database = "default" + +if (HoodieSqlCommonUtils.isUsingHiveCatalog(spark)) { + val tableName1 = "test_ctas_1" + spark.sql( + s""" + | create table $tableName1 using hudi + | tblproperties( + | primaryKey = 'id' + | ) + | AS + | select 1 as id, 'a1' as name, 10 as price, 1000 as ts + """.stripMargin) + if (spark.catalog.getTable(tableName1).tableType.equals("MANAGED")) { + val tableName2 = "test_ctas_2" + spark.sql( + s""" + | create table $tableName2 using hudi + | tblproperties( + | primaryKey = 'id' + | ) + | location '/tmp/$tableName2' + | AS + | select 1 as id, 'a1' as name, 10 as price, 1000 as ts + """.stripMargin) + if (spark.catalog.getTable(tableName2).tableType.equals("EXTERNAL")) { + System.out.println("CTAS hive table type validation passed.") + System.exit(0) + } else { + System.err.println(s"CTAS hive table type validation failed:\n\tThe table type of $database.$tableName2 should be EXTERNAL") + System.exit(1) + } + } else { + System.err.println(s"CTAS hive table type validation failed:\n\tThe table type of $database.$tableName1 should be MANAGED") + System.exit(1) + } +} else { + System.err.println(s"CTAS hive table type validation failed:\n\tSpark should us Hive as Session's Catalog") + System.exit(1) +} + diff --git a/packaging/bundle-validation/validate.sh b/packaging/bundle-validation/validate.sh index 7d40228651cb5..8584315dfce92 100755 --- a/packaging/bundle-validation/validate.sh +++ b/packaging/bundle-validation/validate.sh @@ -75,6 +75,27 @@ test_spark_hadoop_mr_bundles () { echo "::warning::validate.sh spark & hadoop-mr bundles validation was successful." } +## +# Function to test the hive table type with hive sync by CTAS in spark bundle. +# +# env vars (defined in container): +# HIVE_HOME: path to the hive directory +# DERBY_HOME: path to the derby directory +# SPARK_HOME: path to the spark directory +## +test_CTAS_hiveTableType () { + echo "::warning::validate.sh setting up hive metastore for CTAS hive table type validation" + + $DERBY_HOME/bin/startNetworkServer -h 0.0.0.0 & + $HIVE_HOME/bin/hiveserver2 & + echo "::warning::validate.sh hive metastore setup complete. Testing" + $SPARK_HOME/bin/spark-shell --jars $JARS_DIR/spark.jar < $WORKDIR/spark_hadoop_mr/validateCtasTableType.scala + if [ "$?" -ne 0 ]; then + echo "::error::validate.sh failed validate CTAS hive table type" + exit 1 + fi + echo "::warning::validate.sh CTAS hive table type validation successful" +} ## # Function to test the utilities bundle and utilities slim bundle + spark bundle. @@ -168,6 +189,11 @@ if [ "$?" -ne 0 ]; then fi echo "::warning::validate.sh done validating spark & hadoop-mr bundle" +test_CTAS_hiveTableType +if [ "$?" -ne 0 ]; then + exit 1 +fi + if [[ $SPARK_HOME == *"spark-2.4"* ]] || [[ $SPARK_HOME == *"spark-3.1"* ]] then echo "::warning::validate.sh validating utilities bundle"