-
Notifications
You must be signed in to change notification settings - Fork 383
feat: intial hudi reg test #3641
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -70,6 +70,52 @@ SPARK_VERSION="3.5.6" | |
|
|
||
| SPARK_SHELL_OPTIONS=("PACKAGE" "JAR") | ||
|
|
||
| # Auto-discover test suites from the suites/ directory | ||
| # Test files must follow naming convention: <name>_<table_format>.sh | ||
| SUITES_DIR="${SCRIPT_DIR}/suites" | ||
|
|
||
| if [[ ! -d "$SUITES_DIR" ]]; then | ||
| logred "Error: Test suites directory not found: ${SUITES_DIR}" | ||
| exit 1 | ||
| fi | ||
|
|
||
| # Parses a test suite filename (e.g. "spark_sql_delta.sh") to extract: | ||
| # TABLE_FORMAT - the table format suffix after the last '_' (e.g. "delta") | ||
| # TEST_SHORTNAME - the base name without the .sh extension (e.g. "spark_sql_delta") | ||
| # TEST_FILE - the full path to the suite file under SUITES_DIR | ||
| parse_test_suite() { | ||
| local filename="$1" | ||
| local base="${filename%.sh}" | ||
| TABLE_FORMAT="${base##*_}" | ||
| TEST_SHORTNAME="${base}" | ||
| TEST_FILE="${SUITES_DIR}/${filename}" | ||
| } | ||
|
|
||
| declare -a TEST_SUITES=() | ||
| for test_file in "${SUITES_DIR}"/*.sh; do | ||
| [[ -f "$test_file" ]] || continue | ||
| TEST_SUITES+=("$(basename "$test_file")") | ||
| done | ||
|
|
||
| if [[ ${#TEST_SUITES[@]} -eq 0 ]]; then | ||
| logred "Error: No test suites found in ${SUITES_DIR}" | ||
| exit 1 | ||
| fi | ||
|
|
||
| # Allow running specific test via environment variable | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can potentially also allow running all suites for a particular format by taking table format as an argument to this script. We can probably do that in a separate PR as an improvement.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lets do in another pr |
||
| echo "REGTEST_SUITE=${REGTEST_SUITE}" | ||
| if [[ -n "$REGTEST_SUITE" ]]; then | ||
| REGTEST_SUITE="${REGTEST_SUITE%.sh}" | ||
| SUITE_FILE="${REGTEST_SUITE}.sh" | ||
| if [[ ! -f "${SUITES_DIR}/${SUITE_FILE}" ]]; then | ||
| logred "Error: Test suite not found: ${SUITES_DIR}/${SUITE_FILE}" | ||
| exit 1 | ||
| fi | ||
| echo "Overriding TEST_SUITES to run only: ${REGTEST_SUITE}" | ||
| TEST_SUITES=("${SUITE_FILE}") | ||
| fi | ||
| echo "Will run test suites: ${TEST_SUITES[@]}" | ||
|
|
||
| for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do | ||
| echo "RUN REGRESSION TEST FOR SPARK_MAJOR_VERSION=${SPARK_MAJOR_VERSION}, SPARK_VERSION=${SPARK_VERSION}, SCALA_VERSION=${SCALA_VERSION}" | ||
| # find the project jar | ||
|
|
@@ -89,55 +135,64 @@ for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do | |
| fi | ||
|
|
||
| for SPARK_SHELL_OPTION in "${SPARK_SHELL_OPTIONS[@]}"; do | ||
| # clean up the default configuration if exists | ||
| if [ -f "${SPARK_HOME}" ]; then | ||
| SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf" | ||
| if [ -f ${SPARK_CONF} ]; then | ||
| rm ${SPARK_CONF} | ||
| fi | ||
| fi | ||
|
|
||
| if [ "${SPARK_SHELL_OPTION}" == "PACKAGE" ]; then | ||
| # run the setup without jar configuration | ||
| source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} | ||
| else | ||
| source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} --jar ${JAR_PATH} | ||
| fi | ||
|
|
||
| # run the spark_sql test | ||
| loginfo "Starting test spark_sql.sh" | ||
|
|
||
| TEST_FILE="spark_sql.sh" | ||
| TEST_SHORTNAME="spark_sql" | ||
| TEST_TMPDIR="/tmp/polaris-spark-regtests/${TEST_SHORTNAME}_${SPARK_MAJOR_VERSION}_${SCALA_VERSION}" | ||
| TEST_STDERR="${TEST_TMPDIR}/${TEST_SHORTNAME}.stderr" | ||
| TEST_STDOUT="${TEST_TMPDIR}/${TEST_SHORTNAME}.stdout" | ||
|
|
||
| mkdir -p ${TEST_TMPDIR} | ||
| if (( ${VERBOSE} )); then | ||
| ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' | tee ${TEST_STDOUT} | ||
| else | ||
| ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' > ${TEST_STDOUT} | ||
| fi | ||
| loginfo "Test run concluded for ${TEST_SUITE}:${TEST_SHORTNAME}" | ||
|
|
||
| TEST_REF="$(realpath ${SCRIPT_DIR})/${TEST_SHORTNAME}.ref" | ||
| if cmp --silent ${TEST_STDOUT} ${TEST_REF}; then | ||
| loggreen "Test SUCCEEDED: ${TEST_SUITE}:${TEST_SHORTNAME}" | ||
| else | ||
| logred "Test FAILED: ${TEST_SUITE}:${TEST_SHORTNAME}" | ||
| echo '#!/bin/bash' > ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh | ||
| echo "meld ${TEST_STDOUT} ${TEST_REF}" >> ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh | ||
| chmod 750 ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh | ||
| logred "To compare and fix diffs (if 'meld' installed): ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh" | ||
| logred "Or manually diff: diff ${TEST_STDOUT} ${TEST_REF}" | ||
| logred "See stderr from test run for additional diagnostics: ${TEST_STDERR}" | ||
| diff ${TEST_STDOUT} ${TEST_REF} | ||
| NUM_FAILURES=$(( NUM_FAILURES + 1 )) | ||
| fi | ||
| # Loop through each test suite | ||
| for TEST_SUITE_FILE in "${TEST_SUITES[@]}"; do | ||
| parse_test_suite "$TEST_SUITE_FILE" | ||
|
|
||
| loginfo "Setting up for test suite: ${TEST_SHORTNAME} with table format: ${TABLE_FORMAT}" | ||
|
|
||
| # clean up the default configuration if exists | ||
| if [ -d "${SPARK_HOME}" ]; then | ||
| SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf" | ||
| if [ -f "${SPARK_CONF}" ]; then | ||
| echo "Clean spark conf file" | ||
| rm ${SPARK_CONF} | ||
| fi | ||
| fi | ||
|
|
||
| echo "finish SPARK_HOME check" | ||
|
|
||
| # Run setup with appropriate table format | ||
| if [ "${SPARK_SHELL_OPTION}" == "PACKAGE" ]; then | ||
| # run the setup without jar configuration | ||
| source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} --tableFormat ${TABLE_FORMAT} | ||
| else | ||
| source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} --jar ${JAR_PATH} --tableFormat ${TABLE_FORMAT} | ||
| fi | ||
|
|
||
| # run the test | ||
| loginfo "Starting test ${TEST_SHORTNAME}" | ||
|
|
||
| TEST_TMPDIR="/tmp/polaris-spark-regtests/${TEST_SHORTNAME}_${SPARK_MAJOR_VERSION}_${SCALA_VERSION}_${SPARK_SHELL_OPTION}_${TABLE_FORMAT}" | ||
| TEST_STDERR="${TEST_TMPDIR}/${TEST_SHORTNAME}.stderr" | ||
| TEST_STDOUT="${TEST_TMPDIR}/${TEST_SHORTNAME}.stdout" | ||
|
|
||
| mkdir -p ${TEST_TMPDIR} | ||
| if (( ${VERBOSE} )); then | ||
| ${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' | tee ${TEST_STDOUT} | ||
| else | ||
| ${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' > ${TEST_STDOUT} | ||
| fi | ||
| loginfo "Test run concluded for ${TEST_SHORTNAME}" | ||
|
|
||
| # Compare output with reference | ||
| TEST_REF="${SUITES_DIR}/${TEST_SHORTNAME}.ref" | ||
| if cmp --silent ${TEST_STDOUT} ${TEST_REF}; then | ||
| loggreen "Test SUCCEEDED: ${TEST_SHORTNAME}" | ||
| else | ||
| logred "Test FAILED: ${TEST_SHORTNAME}" | ||
| echo '#!/bin/bash' > ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh | ||
| echo "meld ${TEST_STDOUT} ${TEST_REF}" >> ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh | ||
| chmod 750 ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh | ||
| logred "To compare and fix diffs (if 'meld' installed): ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh" | ||
| logred "Or manually diff: diff ${TEST_STDOUT} ${TEST_REF}" | ||
| logred "See stderr from test run for additional diagnostics: ${TEST_STDERR}" | ||
| diff ${TEST_STDOUT} ${TEST_REF} | ||
| NUM_FAILURES=$(( NUM_FAILURES + 1 )) | ||
| fi | ||
| done | ||
| done | ||
|
|
||
| # clean up | ||
| if [ "${SPARK_EXISTS}" = "FALSE" ]; then | ||
| rm -rf ${SPARK_HOME} | ||
| export SPARK_HOME="" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| {"defaults":{"default-base-location":"file:///tmp/spark_hudi_catalog"},"overrides":{"prefix":"spark_hudi_catalog"},"endpoints":["GET /v1/{prefix}/namespaces","GET /v1/{prefix}/namespaces/{namespace}","HEAD /v1/{prefix}/namespaces/{namespace}","POST /v1/{prefix}/namespaces","POST /v1/{prefix}/namespaces/{namespace}/properties","DELETE /v1/{prefix}/namespaces/{namespace}","GET /v1/{prefix}/namespaces/{namespace}/tables","GET /v1/{prefix}/namespaces/{namespace}/tables/{table}","HEAD /v1/{prefix}/namespaces/{namespace}/tables/{table}","POST /v1/{prefix}/namespaces/{namespace}/tables","POST /v1/{prefix}/namespaces/{namespace}/tables/{table}","DELETE /v1/{prefix}/namespaces/{namespace}/tables/{table}","POST /v1/{prefix}/tables/rename","POST /v1/{prefix}/namespaces/{namespace}/register","POST /v1/{prefix}/namespaces/{namespace}/tables/{table}/metrics","POST /v1/{prefix}/transactions/commit","GET /v1/{prefix}/namespaces/{namespace}/views","GET /v1/{prefix}/namespaces/{namespace}/views/{view}","HEAD /v1/{prefix}/namespaces/{namespace}/views/{view}","POST /v1/{prefix}/namespaces/{namespace}/views","POST /v1/{prefix}/namespaces/{namespace}/views/{view}","DELETE /v1/{prefix}/namespaces/{namespace}/views/{view}","POST /v1/{prefix}/views/rename","GET polaris/v1/{prefix}/namespaces/{namespace}/generic-tables","POST polaris/v1/{prefix}/namespaces/{namespace}/generic-tables","DELETE polaris/v1/{prefix}/namespaces/{namespace}/generic-tables/{generic-table}","GET polaris/v1/{prefix}/namespaces/{namespace}/generic-tables/{generic-table}","GET /polaris/v1/{prefix}/namespaces/{namespace}/policies","POST /polaris/v1/{prefix}/namespaces/{namespace}/policies","GET /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}","PUT /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}","DELETE /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}","PUT /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}/mappings","POST /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}/mappings","GET /polaris/v1/{prefix}/applicable-policies"]} | ||
| Catalog created | ||
| spark-sql (default)> use polaris; | ||
| spark-sql ()> create namespace hudi_db1; | ||
| spark-sql ()> create namespace hudi_db2; | ||
| spark-sql ()> show namespaces; | ||
| hudi_db1 | ||
| hudi_db2 | ||
| spark-sql ()> | ||
| > create namespace hudi_db1.schema1; | ||
| spark-sql ()> show namespaces in hudi_db1; | ||
| hudi_db1.schema1 | ||
| spark-sql ()> | ||
| > create table hudi_db1.schema1.hudi_tb1 (id int, name string) using hudi location 'file:///tmp/spark_hudi_catalog/hudi_tb1'; | ||
| spark-sql ()> show tables in hudi_db1; | ||
| spark-sql ()> show tables in hudi_db1.schema1; | ||
| spark-sql ()> | ||
| > use hudi_db1.schema1; | ||
| spark-sql (hudi_db1.schema1)> insert into hudi_tb1 values (1, 'alice'), (2, 'bob'); | ||
| spark-sql (hudi_db1.schema1)> select * from hudi_tb1 order by id; | ||
| spark-sql (hudi_db1.schema1)> | ||
| > create table hudi_tb2 (name string, age int, country string) using hudi partitioned by (country) location 'file:///tmp/spark_hudi_catalog/hudi_tb2'; | ||
| spark-sql (hudi_db1.schema1)> insert into hudi_tb2 values ('anna', 10, 'US'), ('james', 32, 'US'), ('yan', 16, 'CHINA'); | ||
| spark-sql (hudi_db1.schema1)> select name, country from hudi_tb2 order by age; | ||
| spark-sql (hudi_db1.schema1)> | ||
| > show tables; | ||
| spark-sql (hudi_db1.schema1)> | ||
| > use hudi_db1; | ||
| spark-sql (hudi_db1)> create table iceberg_tb (col1 int); | ||
| spark-sql (hudi_db1)> insert into iceberg_tb values (100), (200); | ||
| spark-sql (hudi_db1)> select * from iceberg_tb order by col1; | ||
| 100 | ||
| 200 | ||
| spark-sql (hudi_db1)> | ||
| > show tables; | ||
| iceberg_tb | ||
| spark-sql (hudi_db1)> show tables in hudi_db1.schema1; | ||
| spark-sql (hudi_db1)> | ||
| > drop table hudi_db1.schema1.hudi_tb1; | ||
| spark-sql (hudi_db1)> drop table hudi_db1.schema1.hudi_tb2; | ||
| spark-sql (hudi_db1)> drop namespace hudi_db1.schema1; | ||
| spark-sql (hudi_db1)> drop table iceberg_tb; | ||
| spark-sql (hudi_db1)> drop namespace hudi_db1; | ||
| spark-sql (hudi_db1)> drop namespace hudi_db2; | ||
| spark-sql (hudi_db1)> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we add a comment here about what this function is doing, it is trying to extract the TABLE_ROMAT, TEST_SHORTNAME and the full path of TEST_FILE, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ack