diff --git a/plugins/spark/v3.5/regtests/README.md b/plugins/spark/v3.5/regtests/README.md index 06a0ccd13b..de3355204c 100755 --- a/plugins/spark/v3.5/regtests/README.md +++ b/plugins/spark/v3.5/regtests/README.md @@ -84,3 +84,48 @@ Note: the regression tests expect Polaris to run with certain options, e.g. with storage, default realm `POLARIS` and root credentials `root:secret`; if you run the above command, this will be the case. If you run Polaris in a different way, make sure that Polaris is configured appropriately. + +## Running Specific Test Suites + +By default, `run.sh` auto-discovers and executes all test suites in the `suites/` directory. +To run a specific suite, use the `REGTEST_SUITE` environment variable with just the test name: + +```bash +# Run only Delta tests +env POLARIS_HOST=localhost REGTEST_SUITE=spark_sql_delta ./plugins/spark/v3.5/regtests/run.sh + +# Run only Hudi tests +env POLARIS_HOST=localhost REGTEST_SUITE=spark_sql_hudi ./plugins/spark/v3.5/regtests/run.sh +``` + +## Adding a New Test Suite + +Test suites are auto-discovered from the `suites/` directory. To add a new test: + +1. Create `suites/_.sh` (must be executable) +2. Create `suites/_.ref` (expected output) +3. The table format is automatically parsed from the last segment before `.sh` +4. Supported table formats: `delta`, `hudi` + +## Table Format Support + +The regression tests support multiple table formats through the `--tableFormat` parameter in `setup.sh`: + +- **Delta** (default): Uses `DeltaCatalog` for `spark_catalog`. Tests both Iceberg and Delta tables. +- **Hudi**: Uses `HoodieCatalog` for `spark_catalog`. Tests both Iceberg and Hudi tables. + +Each test suite is isolated with its own Spark configuration and catalog setup. The `spark_catalog` +can only be configured to one catalog implementation at a time, which is why separate test suites +are needed for Delta and Hudi formats. + +### Manual Setup + +You can manually run `setup.sh` with a specific table format: + +```bash +# Setup for Delta tables (default) +./plugins/spark/v3.5/regtests/setup.sh --sparkVersion 3.5.6 --scalaVersion 2.12 --polarisVersion 0.1.0 --tableFormat delta + +# Setup for Hudi tables +./plugins/spark/v3.5/regtests/setup.sh --sparkVersion 3.5.6 --scalaVersion 2.12 --polarisVersion 0.1.0 --tableFormat hudi +``` diff --git a/plugins/spark/v3.5/regtests/run.sh b/plugins/spark/v3.5/regtests/run.sh index cc84c0411c..d925f7e75c 100755 --- a/plugins/spark/v3.5/regtests/run.sh +++ b/plugins/spark/v3.5/regtests/run.sh @@ -70,6 +70,52 @@ SPARK_VERSION="3.5.6" SPARK_SHELL_OPTIONS=("PACKAGE" "JAR") +# Auto-discover test suites from the suites/ directory +# Test files must follow naming convention: _.sh +SUITES_DIR="${SCRIPT_DIR}/suites" + +if [[ ! -d "$SUITES_DIR" ]]; then + logred "Error: Test suites directory not found: ${SUITES_DIR}" + exit 1 +fi + +# Parses a test suite filename (e.g. "spark_sql_delta.sh") to extract: +# TABLE_FORMAT - the table format suffix after the last '_' (e.g. "delta") +# TEST_SHORTNAME - the base name without the .sh extension (e.g. "spark_sql_delta") +# TEST_FILE - the full path to the suite file under SUITES_DIR +parse_test_suite() { + local filename="$1" + local base="${filename%.sh}" + TABLE_FORMAT="${base##*_}" + TEST_SHORTNAME="${base}" + TEST_FILE="${SUITES_DIR}/${filename}" +} + +declare -a TEST_SUITES=() +for test_file in "${SUITES_DIR}"/*.sh; do + [[ -f "$test_file" ]] || continue + TEST_SUITES+=("$(basename "$test_file")") +done + +if [[ ${#TEST_SUITES[@]} -eq 0 ]]; then + logred "Error: No test suites found in ${SUITES_DIR}" + exit 1 +fi + +# Allow running specific test via environment variable +echo "REGTEST_SUITE=${REGTEST_SUITE}" +if [[ -n "$REGTEST_SUITE" ]]; then + REGTEST_SUITE="${REGTEST_SUITE%.sh}" + SUITE_FILE="${REGTEST_SUITE}.sh" + if [[ ! -f "${SUITES_DIR}/${SUITE_FILE}" ]]; then + logred "Error: Test suite not found: ${SUITES_DIR}/${SUITE_FILE}" + exit 1 + fi + echo "Overriding TEST_SUITES to run only: ${REGTEST_SUITE}" + TEST_SUITES=("${SUITE_FILE}") +fi +echo "Will run test suites: ${TEST_SUITES[@]}" + for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do echo "RUN REGRESSION TEST FOR SPARK_MAJOR_VERSION=${SPARK_MAJOR_VERSION}, SPARK_VERSION=${SPARK_VERSION}, SCALA_VERSION=${SCALA_VERSION}" # find the project jar @@ -89,55 +135,64 @@ for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do fi for SPARK_SHELL_OPTION in "${SPARK_SHELL_OPTIONS[@]}"; do - # clean up the default configuration if exists - if [ -f "${SPARK_HOME}" ]; then - SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf" - if [ -f ${SPARK_CONF} ]; then - rm ${SPARK_CONF} - fi - fi - - if [ "${SPARK_SHELL_OPTION}" == "PACKAGE" ]; then - # run the setup without jar configuration - source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} - else - source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} --jar ${JAR_PATH} - fi - - # run the spark_sql test - loginfo "Starting test spark_sql.sh" - - TEST_FILE="spark_sql.sh" - TEST_SHORTNAME="spark_sql" - TEST_TMPDIR="/tmp/polaris-spark-regtests/${TEST_SHORTNAME}_${SPARK_MAJOR_VERSION}_${SCALA_VERSION}" - TEST_STDERR="${TEST_TMPDIR}/${TEST_SHORTNAME}.stderr" - TEST_STDOUT="${TEST_TMPDIR}/${TEST_SHORTNAME}.stdout" - - mkdir -p ${TEST_TMPDIR} - if (( ${VERBOSE} )); then - ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' | tee ${TEST_STDOUT} - else - ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' > ${TEST_STDOUT} - fi - loginfo "Test run concluded for ${TEST_SUITE}:${TEST_SHORTNAME}" - - TEST_REF="$(realpath ${SCRIPT_DIR})/${TEST_SHORTNAME}.ref" - if cmp --silent ${TEST_STDOUT} ${TEST_REF}; then - loggreen "Test SUCCEEDED: ${TEST_SUITE}:${TEST_SHORTNAME}" - else - logred "Test FAILED: ${TEST_SUITE}:${TEST_SHORTNAME}" - echo '#!/bin/bash' > ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh - echo "meld ${TEST_STDOUT} ${TEST_REF}" >> ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh - chmod 750 ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh - logred "To compare and fix diffs (if 'meld' installed): ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh" - logred "Or manually diff: diff ${TEST_STDOUT} ${TEST_REF}" - logred "See stderr from test run for additional diagnostics: ${TEST_STDERR}" - diff ${TEST_STDOUT} ${TEST_REF} - NUM_FAILURES=$(( NUM_FAILURES + 1 )) - fi + # Loop through each test suite + for TEST_SUITE_FILE in "${TEST_SUITES[@]}"; do + parse_test_suite "$TEST_SUITE_FILE" + + loginfo "Setting up for test suite: ${TEST_SHORTNAME} with table format: ${TABLE_FORMAT}" + + # clean up the default configuration if exists + if [ -d "${SPARK_HOME}" ]; then + SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf" + if [ -f "${SPARK_CONF}" ]; then + echo "Clean spark conf file" + rm ${SPARK_CONF} + fi + fi + + echo "finish SPARK_HOME check" + + # Run setup with appropriate table format + if [ "${SPARK_SHELL_OPTION}" == "PACKAGE" ]; then + # run the setup without jar configuration + source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} --tableFormat ${TABLE_FORMAT} + else + source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} --jar ${JAR_PATH} --tableFormat ${TABLE_FORMAT} + fi + + # run the test + loginfo "Starting test ${TEST_SHORTNAME}" + + TEST_TMPDIR="/tmp/polaris-spark-regtests/${TEST_SHORTNAME}_${SPARK_MAJOR_VERSION}_${SCALA_VERSION}_${SPARK_SHELL_OPTION}_${TABLE_FORMAT}" + TEST_STDERR="${TEST_TMPDIR}/${TEST_SHORTNAME}.stderr" + TEST_STDOUT="${TEST_TMPDIR}/${TEST_SHORTNAME}.stdout" + + mkdir -p ${TEST_TMPDIR} + if (( ${VERBOSE} )); then + ${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' | tee ${TEST_STDOUT} + else + ${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' > ${TEST_STDOUT} + fi + loginfo "Test run concluded for ${TEST_SHORTNAME}" + + # Compare output with reference + TEST_REF="${SUITES_DIR}/${TEST_SHORTNAME}.ref" + if cmp --silent ${TEST_STDOUT} ${TEST_REF}; then + loggreen "Test SUCCEEDED: ${TEST_SHORTNAME}" + else + logred "Test FAILED: ${TEST_SHORTNAME}" + echo '#!/bin/bash' > ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh + echo "meld ${TEST_STDOUT} ${TEST_REF}" >> ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh + chmod 750 ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh + logred "To compare and fix diffs (if 'meld' installed): ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh" + logred "Or manually diff: diff ${TEST_STDOUT} ${TEST_REF}" + logred "See stderr from test run for additional diagnostics: ${TEST_STDERR}" + diff ${TEST_STDOUT} ${TEST_REF} + NUM_FAILURES=$(( NUM_FAILURES + 1 )) + fi + done done - # clean up if [ "${SPARK_EXISTS}" = "FALSE" ]; then rm -rf ${SPARK_HOME} export SPARK_HOME="" diff --git a/plugins/spark/v3.5/regtests/setup.sh b/plugins/spark/v3.5/regtests/setup.sh index 1a23d3b5ac..50b8ff2dd8 100755 --- a/plugins/spark/v3.5/regtests/setup.sh +++ b/plugins/spark/v3.5/regtests/setup.sh @@ -25,12 +25,15 @@ # Warning - it will set the SPARK_HOME environment variable with the spark setup # # The script can be called independently like following -# ./setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --jar ${JAR_PATH} +# ./setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --jar ${JAR_PATH} --tableFormat ${TABLE_FORMAT} # Required Parameters: # --sparkVersion : the spark version to setup # --scalaVersion : the scala version of spark to setup # --jar : path to the local Polaris Spark client jar # +# Optional Parameters: +# --tableFormat : table format to configure (delta|hudi). Default: delta +# set -x @@ -40,6 +43,7 @@ SPARK_VERSION=3.5.6 SCALA_VERSION=2.12 POLARIS_CLIENT_JAR="" POLARIS_VERSION="" +TABLE_FORMAT="delta" while [[ $# -gt 0 ]]; do case "$1" in --sparkVersion) @@ -62,13 +66,24 @@ while [[ $# -gt 0 ]]; do shift # past argument shift # past value ;; + --tableFormat) + TABLE_FORMAT="$2" + shift # past argument + shift # past value + ;; --) shift; break ;; esac done -echo "SET UP FOR SPARK_VERSION=${SPARK_VERSION} SCALA_VERSION=${SCALA_VERSION} POLARIS_VERSION=${POLARIS_VERSION} POLARIS_CLIENT_JAR=${POLARIS_CLIENT_JAR}" +echo "SET UP FOR SPARK_VERSION=${SPARK_VERSION} SCALA_VERSION=${SCALA_VERSION} POLARIS_VERSION=${POLARIS_VERSION} POLARIS_CLIENT_JAR=${POLARIS_CLIENT_JAR} TABLE_FORMAT=${TABLE_FORMAT}" + +# Validate table format +if [[ "$TABLE_FORMAT" != "delta" && "$TABLE_FORMAT" != "hudi" ]]; then + echo "Error: Invalid table format '${TABLE_FORMAT}'. Must be 'delta' or 'hudi'." + exit 1 +fi if [ "$SCALA_VERSION" == "2.12" ]; then SPARK_DISTRIBUTION=spark-${SPARK_VERSION}-bin-hadoop3 @@ -141,14 +156,32 @@ else if [[ -z "$POLARIS_CLIENT_JAR" ]]; then cat << EOF >> ${SPARK_CONF} # POLARIS Spark client test conf +EOF + if [[ "$TABLE_FORMAT" == "hudi" ]]; then + cat << EOF >> ${SPARK_CONF} +spark.jars.packages org.apache.polaris:polaris-spark-3.5_$SCALA_VERSION:$POLARIS_VERSION,org.apache.hudi:hudi-spark3.5-bundle_${SCALA_VERSION}:1.1.1 +# Note: Hudi package is passed via --packages on command line in spark_sql_hudi.sh +# to ensure it's resolved before Kryo initialization +EOF + else + cat << EOF >> ${SPARK_CONF} spark.jars.packages org.apache.polaris:polaris-spark-3.5_$SCALA_VERSION:$POLARIS_VERSION,io.delta:delta-spark_${SCALA_VERSION}:3.2.1 EOF + fi else cat << EOF >> ${SPARK_CONF} # POLARIS Spark client test conf spark.jars $POLARIS_CLIENT_JAR +EOF + if [[ "$TABLE_FORMAT" == "hudi" ]]; then + cat << EOF >> ${SPARK_CONF} +spark.jars.packages org.apache.hudi:hudi-spark3.5-bundle_${SCALA_VERSION}:1.1.1 +EOF + else + cat << EOF >> ${SPARK_CONF} spark.jars.packages io.delta:delta-spark_${SCALA_VERSION}:3.2.1 EOF + fi fi cat << EOF >> ${SPARK_CONF} @@ -157,9 +190,26 @@ spark.sql.variable.substitute true spark.driver.extraJavaOptions -Dderby.system.home=${DERBY_HOME} +EOF + +if [[ "$TABLE_FORMAT" == "hudi" ]]; then + cat << EOF >> ${SPARK_CONF} +spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,org.apache.spark.sql.hudi.HoodieSparkSessionExtension +# this configuration is needed for hudi table +spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog +spark.serializer=org.apache.spark.serializer.KryoSerializer +spark.kryo.registrator=org.apache.spark.HoodieSparkKryoRegistrar +hoodie.metadata.enable=false +EOF +else + cat << EOF >> ${SPARK_CONF} spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension # this configuration is needed for delta table spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog +EOF +fi + +cat << EOF >> ${SPARK_CONF} spark.sql.catalog.polaris=org.apache.polaris.spark.SparkCatalog spark.sql.catalog.polaris.uri=http://${POLARIS_HOST:-localhost}:8181/api/catalog # this configuration is currently only used for iceberg tables, generic tables currently diff --git a/plugins/spark/v3.5/regtests/spark_sql.ref b/plugins/spark/v3.5/regtests/suites/spark_sql_delta.ref similarity index 100% rename from plugins/spark/v3.5/regtests/spark_sql.ref rename to plugins/spark/v3.5/regtests/suites/spark_sql_delta.ref diff --git a/plugins/spark/v3.5/regtests/spark_sql.sh b/plugins/spark/v3.5/regtests/suites/spark_sql_delta.sh similarity index 100% rename from plugins/spark/v3.5/regtests/spark_sql.sh rename to plugins/spark/v3.5/regtests/suites/spark_sql_delta.sh diff --git a/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.ref b/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.ref new file mode 100644 index 0000000000..a40b4ddac9 --- /dev/null +++ b/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.ref @@ -0,0 +1,45 @@ +{"defaults":{"default-base-location":"file:///tmp/spark_hudi_catalog"},"overrides":{"prefix":"spark_hudi_catalog"},"endpoints":["GET /v1/{prefix}/namespaces","GET /v1/{prefix}/namespaces/{namespace}","HEAD /v1/{prefix}/namespaces/{namespace}","POST /v1/{prefix}/namespaces","POST /v1/{prefix}/namespaces/{namespace}/properties","DELETE /v1/{prefix}/namespaces/{namespace}","GET /v1/{prefix}/namespaces/{namespace}/tables","GET /v1/{prefix}/namespaces/{namespace}/tables/{table}","HEAD /v1/{prefix}/namespaces/{namespace}/tables/{table}","POST /v1/{prefix}/namespaces/{namespace}/tables","POST /v1/{prefix}/namespaces/{namespace}/tables/{table}","DELETE /v1/{prefix}/namespaces/{namespace}/tables/{table}","POST /v1/{prefix}/tables/rename","POST /v1/{prefix}/namespaces/{namespace}/register","POST /v1/{prefix}/namespaces/{namespace}/tables/{table}/metrics","POST /v1/{prefix}/transactions/commit","GET /v1/{prefix}/namespaces/{namespace}/views","GET /v1/{prefix}/namespaces/{namespace}/views/{view}","HEAD /v1/{prefix}/namespaces/{namespace}/views/{view}","POST /v1/{prefix}/namespaces/{namespace}/views","POST /v1/{prefix}/namespaces/{namespace}/views/{view}","DELETE /v1/{prefix}/namespaces/{namespace}/views/{view}","POST /v1/{prefix}/views/rename","GET polaris/v1/{prefix}/namespaces/{namespace}/generic-tables","POST polaris/v1/{prefix}/namespaces/{namespace}/generic-tables","DELETE polaris/v1/{prefix}/namespaces/{namespace}/generic-tables/{generic-table}","GET polaris/v1/{prefix}/namespaces/{namespace}/generic-tables/{generic-table}","GET /polaris/v1/{prefix}/namespaces/{namespace}/policies","POST /polaris/v1/{prefix}/namespaces/{namespace}/policies","GET /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}","PUT /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}","DELETE /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}","PUT /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}/mappings","POST /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}/mappings","GET /polaris/v1/{prefix}/applicable-policies"]} +Catalog created +spark-sql (default)> use polaris; +spark-sql ()> create namespace hudi_db1; +spark-sql ()> create namespace hudi_db2; +spark-sql ()> show namespaces; +hudi_db1 +hudi_db2 +spark-sql ()> + > create namespace hudi_db1.schema1; +spark-sql ()> show namespaces in hudi_db1; +hudi_db1.schema1 +spark-sql ()> + > create table hudi_db1.schema1.hudi_tb1 (id int, name string) using hudi location 'file:///tmp/spark_hudi_catalog/hudi_tb1'; +spark-sql ()> show tables in hudi_db1; +spark-sql ()> show tables in hudi_db1.schema1; +spark-sql ()> + > use hudi_db1.schema1; +spark-sql (hudi_db1.schema1)> insert into hudi_tb1 values (1, 'alice'), (2, 'bob'); +spark-sql (hudi_db1.schema1)> select * from hudi_tb1 order by id; +spark-sql (hudi_db1.schema1)> + > create table hudi_tb2 (name string, age int, country string) using hudi partitioned by (country) location 'file:///tmp/spark_hudi_catalog/hudi_tb2'; +spark-sql (hudi_db1.schema1)> insert into hudi_tb2 values ('anna', 10, 'US'), ('james', 32, 'US'), ('yan', 16, 'CHINA'); +spark-sql (hudi_db1.schema1)> select name, country from hudi_tb2 order by age; +spark-sql (hudi_db1.schema1)> + > show tables; +spark-sql (hudi_db1.schema1)> + > use hudi_db1; +spark-sql (hudi_db1)> create table iceberg_tb (col1 int); +spark-sql (hudi_db1)> insert into iceberg_tb values (100), (200); +spark-sql (hudi_db1)> select * from iceberg_tb order by col1; +100 +200 +spark-sql (hudi_db1)> + > show tables; +iceberg_tb +spark-sql (hudi_db1)> show tables in hudi_db1.schema1; +spark-sql (hudi_db1)> + > drop table hudi_db1.schema1.hudi_tb1; +spark-sql (hudi_db1)> drop table hudi_db1.schema1.hudi_tb2; +spark-sql (hudi_db1)> drop namespace hudi_db1.schema1; +spark-sql (hudi_db1)> drop table iceberg_tb; +spark-sql (hudi_db1)> drop namespace hudi_db1; +spark-sql (hudi_db1)> drop namespace hudi_db2; +spark-sql (hudi_db1)> diff --git a/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.sh b/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.sh new file mode 100755 index 0000000000..0a230a4d0f --- /dev/null +++ b/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.sh @@ -0,0 +1,85 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN}" + +# Determine Scala version (default to 2.12 if not set) +SCALA_VERSION="${SCALA_VERSION:-2.12}" + +CATALOG_NAME="spark_hudi_catalog" +curl -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ + -d '{"name": "spark_hudi_catalog", "id": 200, "type": "INTERNAL", "readOnly": false, "properties": {"default-base-location": "file:///tmp/spark_hudi_catalog"}, "storageConfigInfo": {"storageType": "FILE", "allowedLocations": ["file:///tmp"]}}' > /dev/stderr + +# Add TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it can only manage access and metadata +curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/${CATALOG_NAME}/catalog-roles/catalog_admin/grants \ + -d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' > /dev/stderr + +curl -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/config?warehouse=${CATALOG_NAME}" +echo +echo "Catalog created" +cat << EOF | ${SPARK_HOME}/bin/spark-sql -S --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" --conf spark.sql.catalog.polaris.warehouse=${CATALOG_NAME} +use polaris; +create namespace hudi_db1; +create namespace hudi_db2; +show namespaces; + +create namespace hudi_db1.schema1; +show namespaces in hudi_db1; + +create table hudi_db1.schema1.hudi_tb1 (id int, name string) using hudi location 'file:///tmp/spark_hudi_catalog/hudi_tb1'; +show tables in hudi_db1; +show tables in hudi_db1.schema1; + +use hudi_db1.schema1; +insert into hudi_tb1 values (1, 'alice'), (2, 'bob'); +select * from hudi_tb1 order by id; + +create table hudi_tb2 (name string, age int, country string) using hudi partitioned by (country) location 'file:///tmp/spark_hudi_catalog/hudi_tb2'; +insert into hudi_tb2 values ('anna', 10, 'US'), ('james', 32, 'US'), ('yan', 16, 'CHINA'); +select name, country from hudi_tb2 order by age; + +show tables; + +use hudi_db1; +create table iceberg_tb (col1 int); +insert into iceberg_tb values (100), (200); +select * from iceberg_tb order by col1; + +show tables; +show tables in hudi_db1.schema1; + +drop table hudi_db1.schema1.hudi_tb1; +drop table hudi_db1.schema1.hudi_tb2; +drop namespace hudi_db1.schema1; +drop table iceberg_tb; +drop namespace hudi_db1; +drop namespace hudi_db2; +EOF + +# clean up the spark_hudi_catalog dir +rm -rf /tmp/spark_hudi_catalog/ + +curl -i -X DELETE -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/${CATALOG_NAME} > /dev/stderr +