apache · flyrain · Feb 18, 2026 · Feb 2, 2026 · Feb 9, 2026 · Feb 9, 2026
diff --git a/plugins/spark/v3.5/regtests/README.md b/plugins/spark/v3.5/regtests/README.md
@@ -84,3 +84,48 @@ Note: the regression tests expect Polaris to run with certain options, e.g. with
 storage, default realm `POLARIS` and root credentials `root:secret`; if you run the above command,
 this will be the case. If you run Polaris in a different way, make sure that Polaris is configured
 appropriately.
+
+## Running Specific Test Suites
+
+By default, `run.sh` auto-discovers and executes all test suites in the `suites/` directory.
+To run a specific suite, use the `REGTEST_SUITE` environment variable with just the test name:
+
+```bash
+# Run only Delta tests
+env POLARIS_HOST=localhost REGTEST_SUITE=spark_sql_delta ./plugins/spark/v3.5/regtests/run.sh
+
+# Run only Hudi tests
+env POLARIS_HOST=localhost REGTEST_SUITE=spark_sql_hudi ./plugins/spark/v3.5/regtests/run.sh
+```
+
+## Adding a New Test Suite
+
+Test suites are auto-discovered from the `suites/` directory. To add a new test:
+
+1. Create `suites/<descriptive_name>_<table_format>.sh` (must be executable)
+2. Create `suites/<descriptive_name>_<table_format>.ref` (expected output)
+3. The table format is automatically parsed from the last segment before `.sh`
+4. Supported table formats: `delta`, `hudi`
+
+## Table Format Support
+
+The regression tests support multiple table formats through the `--tableFormat` parameter in `setup.sh`:
+
+- **Delta** (default): Uses `DeltaCatalog` for `spark_catalog`. Tests both Iceberg and Delta tables.
+- **Hudi**: Uses `HoodieCatalog` for `spark_catalog`. Tests both Iceberg and Hudi tables.
+
+Each test suite is isolated with its own Spark configuration and catalog setup. The `spark_catalog`
+can only be configured to one catalog implementation at a time, which is why separate test suites
+are needed for Delta and Hudi formats.
+
+### Manual Setup
+
+You can manually run `setup.sh` with a specific table format:
+
+```bash
+# Setup for Delta tables (default)
+./plugins/spark/v3.5/regtests/setup.sh --sparkVersion 3.5.6 --scalaVersion 2.12 --polarisVersion 0.1.0 --tableFormat delta
+
+# Setup for Hudi tables
+./plugins/spark/v3.5/regtests/setup.sh --sparkVersion 3.5.6 --scalaVersion 2.12 --polarisVersion 0.1.0 --tableFormat hudi
+```
diff --git a/plugins/spark/v3.5/regtests/run.sh b/plugins/spark/v3.5/regtests/run.sh
@@ -70,6 +70,52 @@ SPARK_VERSION="3.5.6"
 
 SPARK_SHELL_OPTIONS=("PACKAGE" "JAR")
 
+# Auto-discover test suites from the suites/ directory
+# Test files must follow naming convention: <name>_<table_format>.sh
+SUITES_DIR="${SCRIPT_DIR}/suites"
+
+if [[ ! -d "$SUITES_DIR" ]]; then
+  logred "Error: Test suites directory not found: ${SUITES_DIR}"
+  exit 1
+fi
+
+# Parses a test suite filename (e.g. "spark_sql_delta.sh") to extract:
+#   TABLE_FORMAT    - the table format suffix after the last '_' (e.g. "delta")
+#   TEST_SHORTNAME  - the base name without the .sh extension (e.g. "spark_sql_delta")
+#   TEST_FILE       - the full path to the suite file under SUITES_DIR
+parse_test_suite() {
+  local filename="$1"
+  local base="${filename%.sh}"
+  TABLE_FORMAT="${base##*_}"
+  TEST_SHORTNAME="${base}"
+  TEST_FILE="${SUITES_DIR}/${filename}"
+}
+
+declare -a TEST_SUITES=()
+for test_file in "${SUITES_DIR}"/*.sh; do
+  [[ -f "$test_file" ]] || continue
+  TEST_SUITES+=("$(basename "$test_file")")
+done
+
+if [[ ${#TEST_SUITES[@]} -eq 0 ]]; then
+  logred "Error: No test suites found in ${SUITES_DIR}"
+  exit 1
+fi
+
+# Allow running specific test via environment variable
+echo "REGTEST_SUITE=${REGTEST_SUITE}"
+if [[ -n "$REGTEST_SUITE" ]]; then
+  REGTEST_SUITE="${REGTEST_SUITE%.sh}"
+  SUITE_FILE="${REGTEST_SUITE}.sh"
+  if [[ ! -f "${SUITES_DIR}/${SUITE_FILE}" ]]; then
+    logred "Error: Test suite not found: ${SUITES_DIR}/${SUITE_FILE}"
+    exit 1
+  fi
+  echo "Overriding TEST_SUITES to run only: ${REGTEST_SUITE}"
+  TEST_SUITES=("${SUITE_FILE}")
+fi
+echo "Will run test suites: ${TEST_SUITES[@]}"
+
 for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do
   echo "RUN REGRESSION TEST FOR SPARK_MAJOR_VERSION=${SPARK_MAJOR_VERSION}, SPARK_VERSION=${SPARK_VERSION}, SCALA_VERSION=${SCALA_VERSION}"
   # find the project jar
@@ -89,55 +135,64 @@ for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do
   fi
 
   for SPARK_SHELL_OPTION in "${SPARK_SHELL_OPTIONS[@]}"; do
-    # clean up the default configuration if exists
-    if [ -f "${SPARK_HOME}" ]; then
-      SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf"
-          if [ -f ${SPARK_CONF} ]; then
-            rm ${SPARK_CONF}
-          fi
-    fi
-
-    if [ "${SPARK_SHELL_OPTION}" == "PACKAGE" ]; then
-      # run the setup without jar configuration
-      source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION}
-    else
-      source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} --jar ${JAR_PATH}
-    fi
-
-    # run the spark_sql test
-    loginfo "Starting test spark_sql.sh"
-
-    TEST_FILE="spark_sql.sh"
-    TEST_SHORTNAME="spark_sql"
-    TEST_TMPDIR="/tmp/polaris-spark-regtests/${TEST_SHORTNAME}_${SPARK_MAJOR_VERSION}_${SCALA_VERSION}"
-    TEST_STDERR="${TEST_TMPDIR}/${TEST_SHORTNAME}.stderr"
-    TEST_STDOUT="${TEST_TMPDIR}/${TEST_SHORTNAME}.stdout"
-
-    mkdir -p ${TEST_TMPDIR}
-    if (( ${VERBOSE} )); then
-      ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' | tee ${TEST_STDOUT}
-    else
-      ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' > ${TEST_STDOUT}
-    fi
-    loginfo "Test run concluded for ${TEST_SUITE}:${TEST_SHORTNAME}"
-
-    TEST_REF="$(realpath ${SCRIPT_DIR})/${TEST_SHORTNAME}.ref"
-    if cmp --silent ${TEST_STDOUT} ${TEST_REF}; then
-      loggreen "Test SUCCEEDED: ${TEST_SUITE}:${TEST_SHORTNAME}"
-    else
-      logred "Test FAILED: ${TEST_SUITE}:${TEST_SHORTNAME}"
-      echo '#!/bin/bash' > ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
-      echo "meld ${TEST_STDOUT} ${TEST_REF}" >> ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
-      chmod 750 ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
-      logred "To compare and fix diffs (if 'meld' installed): ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh"
-      logred "Or manually diff: diff ${TEST_STDOUT} ${TEST_REF}"
-      logred "See stderr from test run for additional diagnostics: ${TEST_STDERR}"
-      diff ${TEST_STDOUT} ${TEST_REF}
-      NUM_FAILURES=$(( NUM_FAILURES + 1 ))
-    fi
+    # Loop through each test suite
+    for TEST_SUITE_FILE in "${TEST_SUITES[@]}"; do
+      parse_test_suite "$TEST_SUITE_FILE"
+
+      loginfo "Setting up for test suite: ${TEST_SHORTNAME} with table format: ${TABLE_FORMAT}"
+
+      # clean up the default configuration if exists
+      if [ -d "${SPARK_HOME}" ]; then
+        SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf"
+        if [ -f "${SPARK_CONF}" ]; then
+          echo "Clean spark conf file"
+          rm ${SPARK_CONF}
+        fi
+      fi
+
+      echo "finish SPARK_HOME check"
+
+      # Run setup with appropriate table format
+      if [ "${SPARK_SHELL_OPTION}" == "PACKAGE" ]; then
+        # run the setup without jar configuration
+        source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} --tableFormat ${TABLE_FORMAT}
+      else
+        source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} --jar ${JAR_PATH} --tableFormat ${TABLE_FORMAT}
+      fi
+
+      # run the test
+      loginfo "Starting test ${TEST_SHORTNAME}"
+
+      TEST_TMPDIR="/tmp/polaris-spark-regtests/${TEST_SHORTNAME}_${SPARK_MAJOR_VERSION}_${SCALA_VERSION}_${SPARK_SHELL_OPTION}_${TABLE_FORMAT}"
+      TEST_STDERR="${TEST_TMPDIR}/${TEST_SHORTNAME}.stderr"
+      TEST_STDOUT="${TEST_TMPDIR}/${TEST_SHORTNAME}.stdout"
+
+      mkdir -p ${TEST_TMPDIR}
+      if (( ${VERBOSE} )); then
+        ${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' | tee ${TEST_STDOUT}
+      else
+        ${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' > ${TEST_STDOUT}
+      fi
+      loginfo "Test run concluded for ${TEST_SHORTNAME}"
+
+      # Compare output with reference
+      TEST_REF="${SUITES_DIR}/${TEST_SHORTNAME}.ref"
+      if cmp --silent ${TEST_STDOUT} ${TEST_REF}; then
+        loggreen "Test SUCCEEDED: ${TEST_SHORTNAME}"
+      else
+        logred "Test FAILED: ${TEST_SHORTNAME}"
+        echo '#!/bin/bash' > ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+        echo "meld ${TEST_STDOUT} ${TEST_REF}" >> ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+        chmod 750 ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+        logred "To compare and fix diffs (if 'meld' installed): ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh"
+        logred "Or manually diff: diff ${TEST_STDOUT} ${TEST_REF}"
+        logred "See stderr from test run for additional diagnostics: ${TEST_STDERR}"
+        diff ${TEST_STDOUT} ${TEST_REF}
+        NUM_FAILURES=$(( NUM_FAILURES + 1 ))
+      fi
+    done
   done
 
-  # clean up
   if [ "${SPARK_EXISTS}" = "FALSE" ]; then
     rm -rf ${SPARK_HOME}
     export SPARK_HOME=""

diff --git a/plugins/spark/v3.5/regtests/setup.sh b/plugins/spark/v3.5/regtests/setup.sh
@@ -25,12 +25,15 @@
 # Warning - it will set the SPARK_HOME environment variable with the spark setup
 #
 # The script can be called independently like following
-#   ./setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --jar ${JAR_PATH}
+#   ./setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --jar ${JAR_PATH} --tableFormat ${TABLE_FORMAT}
 # Required Parameters:
 #   --sparkVersion   : the spark version to setup
 #   --scalaVersion   : the scala version of spark to setup
 #   --jar            : path to the local Polaris Spark client jar
 #
+# Optional Parameters:
+#   --tableFormat    : table format to configure (delta|hudi). Default: delta
+#
 
 set -x
 
@@ -40,6 +43,7 @@ SPARK_VERSION=3.5.6
 SCALA_VERSION=2.12
 POLARIS_CLIENT_JAR=""
 POLARIS_VERSION=""
+TABLE_FORMAT="delta"
 while [[ $# -gt 0 ]]; do
   case "$1" in
     --sparkVersion)
@@ -62,13 +66,24 @@ while [[ $# -gt 0 ]]; do
       shift # past argument
       shift # past value
       ;;
+    --tableFormat)
+      TABLE_FORMAT="$2"
+      shift # past argument
+      shift # past value
+      ;;
     --) shift;
       break
       ;;
   esac
 done
 
-echo "SET UP FOR SPARK_VERSION=${SPARK_VERSION} SCALA_VERSION=${SCALA_VERSION} POLARIS_VERSION=${POLARIS_VERSION} POLARIS_CLIENT_JAR=${POLARIS_CLIENT_JAR}"
+echo "SET UP FOR SPARK_VERSION=${SPARK_VERSION} SCALA_VERSION=${SCALA_VERSION} POLARIS_VERSION=${POLARIS_VERSION} POLARIS_CLIENT_JAR=${POLARIS_CLIENT_JAR} TABLE_FORMAT=${TABLE_FORMAT}"
+
+# Validate table format
+if [[ "$TABLE_FORMAT" != "delta" && "$TABLE_FORMAT" != "hudi" ]]; then
+  echo "Error: Invalid table format '${TABLE_FORMAT}'. Must be 'delta' or 'hudi'."
+  exit 1
+fi
 
 if [ "$SCALA_VERSION" == "2.12" ]; then
   SPARK_DISTRIBUTION=spark-${SPARK_VERSION}-bin-hadoop3
@@ -141,14 +156,32 @@ else
 if [[ -z "$POLARIS_CLIENT_JAR" ]]; then
   cat << EOF >> ${SPARK_CONF}
 # POLARIS Spark client test conf
+EOF
+  if [[ "$TABLE_FORMAT" == "hudi" ]]; then
+    cat << EOF >> ${SPARK_CONF}
+spark.jars.packages org.apache.polaris:polaris-spark-3.5_$SCALA_VERSION:$POLARIS_VERSION,org.apache.hudi:hudi-spark3.5-bundle_${SCALA_VERSION}:1.1.1
+# Note: Hudi package is passed via --packages on command line in spark_sql_hudi.sh
+# to ensure it's resolved before Kryo initialization
+EOF
+  else
+    cat << EOF >> ${SPARK_CONF}
 spark.jars.packages org.apache.polaris:polaris-spark-3.5_$SCALA_VERSION:$POLARIS_VERSION,io.delta:delta-spark_${SCALA_VERSION}:3.2.1
 EOF
+  fi
 else
   cat << EOF >> ${SPARK_CONF}
 # POLARIS Spark client test conf
 spark.jars $POLARIS_CLIENT_JAR
+EOF
+  if [[ "$TABLE_FORMAT" == "hudi" ]]; then
+    cat << EOF >> ${SPARK_CONF}
+spark.jars.packages org.apache.hudi:hudi-spark3.5-bundle_${SCALA_VERSION}:1.1.1
+EOF
+  else
+    cat << EOF >> ${SPARK_CONF}
 spark.jars.packages io.delta:delta-spark_${SCALA_VERSION}:3.2.1
 EOF
+  fi
 fi
 
 cat << EOF >> ${SPARK_CONF}
@@ -157,9 +190,26 @@ spark.sql.variable.substitute true
 
 spark.driver.extraJavaOptions -Dderby.system.home=${DERBY_HOME}
 
+EOF
+
+if [[ "$TABLE_FORMAT" == "hudi" ]]; then
+  cat << EOF >> ${SPARK_CONF}
+spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,org.apache.spark.sql.hudi.HoodieSparkSessionExtension
+# this configuration is needed for hudi table
+spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog
+spark.serializer=org.apache.spark.serializer.KryoSerializer
+spark.kryo.registrator=org.apache.spark.HoodieSparkKryoRegistrar
+hoodie.metadata.enable=false
+EOF
+else
+  cat << EOF >> ${SPARK_CONF}
 spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension
 # this configuration is needed for delta table
 spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog
+EOF
+fi
+
+cat << EOF >> ${SPARK_CONF}
 spark.sql.catalog.polaris=org.apache.polaris.spark.SparkCatalog
 spark.sql.catalog.polaris.uri=http://${POLARIS_HOST:-localhost}:8181/api/catalog
 # this configuration is currently only used for iceberg tables, generic tables currently

diff --git a/plugins/spark/v3.5/regtests/spark_sql.ref → .../v3.5/regtests/suites/spark_sql_delta.ref b/plugins/spark/v3.5/regtests/spark_sql.ref → .../v3.5/regtests/suites/spark_sql_delta.ref
diff --git a/plugins/spark/v3.5/regtests/spark_sql.sh → ...k/v3.5/regtests/suites/spark_sql_delta.sh b/plugins/spark/v3.5/regtests/spark_sql.sh → ...k/v3.5/regtests/suites/spark_sql_delta.sh
diff --git a/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.ref b/plugins/spark/v3.5/regtests/suites/spark_sql_hudi.ref
@@ -0,0 +1,45 @@
+{"defaults":{"default-base-location":"file:///tmp/spark_hudi_catalog"},"overrides":{"prefix":"spark_hudi_catalog"},"endpoints":["GET /v1/{prefix}/namespaces","GET /v1/{prefix}/namespaces/{namespace}","HEAD /v1/{prefix}/namespaces/{namespace}","POST /v1/{prefix}/namespaces","POST /v1/{prefix}/namespaces/{namespace}/properties","DELETE /v1/{prefix}/namespaces/{namespace}","GET /v1/{prefix}/namespaces/{namespace}/tables","GET /v1/{prefix}/namespaces/{namespace}/tables/{table}","HEAD /v1/{prefix}/namespaces/{namespace}/tables/{table}","POST /v1/{prefix}/namespaces/{namespace}/tables","POST /v1/{prefix}/namespaces/{namespace}/tables/{table}","DELETE /v1/{prefix}/namespaces/{namespace}/tables/{table}","POST /v1/{prefix}/tables/rename","POST /v1/{prefix}/namespaces/{namespace}/register","POST /v1/{prefix}/namespaces/{namespace}/tables/{table}/metrics","POST /v1/{prefix}/transactions/commit","GET /v1/{prefix}/namespaces/{namespace}/views","GET /v1/{prefix}/namespaces/{namespace}/views/{view}","HEAD /v1/{prefix}/namespaces/{namespace}/views/{view}","POST /v1/{prefix}/namespaces/{namespace}/views","POST /v1/{prefix}/namespaces/{namespace}/views/{view}","DELETE /v1/{prefix}/namespaces/{namespace}/views/{view}","POST /v1/{prefix}/views/rename","GET polaris/v1/{prefix}/namespaces/{namespace}/generic-tables","POST polaris/v1/{prefix}/namespaces/{namespace}/generic-tables","DELETE polaris/v1/{prefix}/namespaces/{namespace}/generic-tables/{generic-table}","GET polaris/v1/{prefix}/namespaces/{namespace}/generic-tables/{generic-table}","GET /polaris/v1/{prefix}/namespaces/{namespace}/policies","POST /polaris/v1/{prefix}/namespaces/{namespace}/policies","GET /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}","PUT /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}","DELETE /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}","PUT /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}/mappings","POST /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}/mappings","GET /polaris/v1/{prefix}/applicable-policies"]}
+Catalog created
+spark-sql (default)> use polaris;
+spark-sql ()> create namespace hudi_db1;
+spark-sql ()> create namespace hudi_db2;
+spark-sql ()> show namespaces;
+hudi_db1
+hudi_db2
+spark-sql ()> 
+            > create namespace hudi_db1.schema1;
+spark-sql ()> show namespaces in hudi_db1;
+hudi_db1.schema1
+spark-sql ()> 
+            > create table hudi_db1.schema1.hudi_tb1 (id int, name string) using hudi location 'file:///tmp/spark_hudi_catalog/hudi_tb1';
+spark-sql ()> show tables in hudi_db1;
+spark-sql ()> show tables in hudi_db1.schema1;
+spark-sql ()> 
+            > use hudi_db1.schema1;
+spark-sql (hudi_db1.schema1)> insert into hudi_tb1 values (1, 'alice'), (2, 'bob');
+spark-sql (hudi_db1.schema1)> select * from hudi_tb1 order by id;
+spark-sql (hudi_db1.schema1)> 
+                            > create table hudi_tb2 (name string, age int, country string) using hudi partitioned by (country) location 'file:///tmp/spark_hudi_catalog/hudi_tb2';
+spark-sql (hudi_db1.schema1)> insert into hudi_tb2 values ('anna', 10, 'US'), ('james', 32, 'US'), ('yan', 16, 'CHINA');
+spark-sql (hudi_db1.schema1)> select name, country from hudi_tb2 order by age;
+spark-sql (hudi_db1.schema1)> 
+                            > show tables;
+spark-sql (hudi_db1.schema1)> 
+                            > use hudi_db1;
+spark-sql (hudi_db1)> create table iceberg_tb (col1 int);
+spark-sql (hudi_db1)> insert into iceberg_tb values (100), (200);
+spark-sql (hudi_db1)> select * from iceberg_tb order by col1;
+100
+200
+spark-sql (hudi_db1)> 
+                    > show tables;
+iceberg_tb
+spark-sql (hudi_db1)> show tables in hudi_db1.schema1;
+spark-sql (hudi_db1)> 
+                    > drop table hudi_db1.schema1.hudi_tb1;
+spark-sql (hudi_db1)> drop table hudi_db1.schema1.hudi_tb2;
+spark-sql (hudi_db1)> drop namespace hudi_db1.schema1;
+spark-sql (hudi_db1)> drop table iceberg_tb;
+spark-sql (hudi_db1)> drop namespace hudi_db1;
+spark-sql (hudi_db1)> drop namespace hudi_db2;
+spark-sql (hudi_db1)>