From 45df076fbb2b367d002c37b63883975baa53b366 Mon Sep 17 00:00:00 2001 From: Eric Chang Date: Mon, 2 Dec 2024 06:48:36 +0800 Subject: [PATCH] feat(ci): init --- .github/ci-test.yml | 70 +++++++++++ .github/workflows/add-to-project.yml | 16 +++ .github/workflows/auto-cherry-pick.yml | 68 +++++++++++ .../workflows/backend-integration-test.yml | 115 ++++++++++++++++++ .github/workflows/build.yml | 109 +++++++++++++++++ .github/workflows/cron-integration-test.yml | 108 ++++++++++++++++ .github/workflows/docker-image.yml | 99 +++++++++++++++ .github/workflows/flink-integration-test.yml | 104 ++++++++++++++++ .../workflows/frontend-integration-test.yml | 107 ++++++++++++++++ .github/workflows/python-integration-test.yml | 85 +++++++++++++ .github/workflows/spark-integration-test.yml | 109 +++++++++++++++++ tests/runSQLOnPlayground.sh | 95 +++++++++++++++ tests/spark-simple.sql | 38 ++++++ tests/spark-simple.sql.out | 4 + tests/trino-cross-catalog.sql | 22 ++++ tests/trino-cross-catalog.sql.out | 42 +++++++ tests/trino-simple.sql | 37 ++++++ tests/trino-simple.sql.out | 22 ++++ tests/trino-test.sql | 1 + tests/trino-test.sql.out | 10 ++ tests/union-spark.sql.out | 15 +++ tests/union.sql | 7 ++ tests/union.sql.out | 15 +++ 23 files changed, 1298 insertions(+) create mode 100644 .github/ci-test.yml create mode 100644 .github/workflows/add-to-project.yml create mode 100644 .github/workflows/auto-cherry-pick.yml create mode 100644 .github/workflows/backend-integration-test.yml create mode 100644 .github/workflows/build.yml create mode 100644 .github/workflows/cron-integration-test.yml create mode 100644 .github/workflows/docker-image.yml create mode 100644 .github/workflows/flink-integration-test.yml create mode 100644 .github/workflows/frontend-integration-test.yml create mode 100644 .github/workflows/python-integration-test.yml create mode 100644 .github/workflows/spark-integration-test.yml create mode 100755 tests/runSQLOnPlayground.sh create mode 100644 tests/spark-simple.sql create mode 100644 tests/spark-simple.sql.out create mode 100644 tests/trino-cross-catalog.sql create mode 100644 tests/trino-cross-catalog.sql.out create mode 100644 tests/trino-simple.sql create mode 100644 tests/trino-simple.sql.out create mode 100644 tests/trino-test.sql create mode 100644 tests/trino-test.sql.out create mode 100644 tests/union-spark.sql.out create mode 100644 tests/union.sql create mode 100644 tests/union.sql.out diff --git a/.github/ci-test.yml b/.github/ci-test.yml new file mode 100644 index 0000000..5c3a641 --- /dev/null +++ b/.github/ci-test.yml @@ -0,0 +1,70 @@ +name: "[CI]" +on: + pull_request: + branches: + - main + paths: + - "**.sh" + - "**.xml" + - "helm-chart/**" +jobs: + linter: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install xmllint + run: | + sudo apt-get update + sudo apt-get install -y libxml2-utils + + - name: Install shfmt + run: | + curl -sSLo shfmt https://github.com/mvdan/sh/releases/download/v3.8.0/shfmt_v3.8.0_linux_amd64 && + chmod +x shfmt && + sudo mv shfmt /usr/local/bin/shfmt + + - name: Lint Shell Scripts + run: | + find . -name '*.sh' -print0 | xargs -0 shfmt -d -i 2 + + - name: Lint XML Files + run: | + find . -name '*.xml' -print0 | xargs -0 xmllint --noout + + - name: Checkout gravitino-playground + uses: actions/checkout@v4 + + - uses: KengoTODA/actions-setup-docker-compose@v1 + with: + version: "2.14.2" + + - name: Deploy + id: deploy + timeout-minutes: 40 + run: | + sudo curl -L https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -o /usr/local/bin/yq + sudo chmod +x /usr/local/bin/yq + cd ${{ github.workspace }}/gravitino-playground + nohup ./playground.sh docker start -y > /tmp/playground.log 2>&1 & + # wait for gravitino trino ready to use + i=0 + while [[ ! $(curl -k http://127.0.0.1:8090) || ! $(curl -k http://127.0.0.1:18080/v1/info) && $i -le 300 ]]; do + sleep 5 + i=$(expr $i + 1) + done + docker ps + if [[ $(curl -k http://127.0.0.1:8090) && $(curl -k http://127.0.0.1:18080/v1/info) ]]; then + echo "gravitino and trino are ready to use" + else + echo "gravitino or trino not ready" + exit 1 + fi + + - name: Test sql + id: test-sql + timeout-minutes: 40 + run: | + cd ${{ github.workspace }}/gravitino-playground/test + bash -x ./runSQLOnPlayground.sh diff --git a/.github/workflows/add-to-project.yml b/.github/workflows/add-to-project.yml new file mode 100644 index 0000000..6ac8c75 --- /dev/null +++ b/.github/workflows/add-to-project.yml @@ -0,0 +1,16 @@ +name: Add issue to project + +on: + issues: + types: + - opened + +jobs: + add-to-project: + name: Add issue to project + runs-on: ubuntu-latest + steps: + - uses: actions/add-to-project@v0.5.0 + with: + project-url: https://github.com/orgs/datastrato/projects/1 + github-token: ${{ secrets.ADD_ISSUE_TO_PROJECT }} diff --git a/.github/workflows/auto-cherry-pick.yml b/.github/workflows/auto-cherry-pick.yml new file mode 100644 index 0000000..8b919af --- /dev/null +++ b/.github/workflows/auto-cherry-pick.yml @@ -0,0 +1,68 @@ +name: Automatically cherry-pick merged PR to different branches + +on: + pull_request_target: + branches: + - main + types: ["closed"] + +jobs: + cherry_pick_branch_0_3: + runs-on: ubuntu-latest + name: Cherry pick into branch_0.3 + if: ${{ contains(github.event.pull_request.labels.*.name, 'branch-0.3') && github.event.pull_request.merged == true }} + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Cherry pick into branch-0.3 + uses: carloscastrojumo/github-cherry-pick-action@v1.0.9 + with: + token: ${{ secrets.BOT_TOKEN }} + branch: branch-0.3 + labels: | + cherry-pick + reviewers: | + jerryshao + + cherry_pick_branch_0_4: + runs-on: ubuntu-latest + name: Cherry pick into branch_0.4 + if: ${{ contains(github.event.pull_request.labels.*.name, 'branch-0.4') && github.event.pull_request.merged == true }} + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Cherry pick into branch-0.4 + uses: carloscastrojumo/github-cherry-pick-action@v1.0.9 + with: + token: ${{ secrets.BOT_TOKEN }} + branch: branch-0.4 + labels: | + cherry-pick + reviewers: | + jerryshao + + cherry_pick_branch_0_5: + runs-on: ubuntu-latest + name: Cherry pick into branch_0.5 + if: ${{ contains(github.event.pull_request.labels.*.name, 'branch-0.5') && github.event.pull_request.merged == true }} + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Cherry pick into branch-0.5 + uses: carloscastrojumo/github-cherry-pick-action@v1.0.9 + with: + token: ${{ secrets.BOT_TOKEN }} + branch: branch-0.5 + labels: | + cherry-pick + reviewers: | + jerryshao + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/backend-integration-test.yml b/.github/workflows/backend-integration-test.yml new file mode 100644 index 0000000..437acbd --- /dev/null +++ b/.github/workflows/backend-integration-test.yml @@ -0,0 +1,115 @@ +name: Backend Integration Test + +# Controls when the workflow will run +on: + # Triggers the workflow on push or pull request events but only for the "main" branch + push: + branches: [ "main", "branch-*" ] + pull_request: + branches: [ "main", "branch-*" ] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + changes: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + source_changes: + - .github/** + - api/** + - bin/** + - catalogs/** + - clients/client-java/** + - clients/client-java-runtime/** + - clients/filesystem-hadoop3/** + - clients/filesystem-hadoop3-runtime/** + - common/** + - conf/** + - core/** + - dev/** + - gradle/** + - integration-test/** + - integration-test-common/** + - meta/** + - server/** + - server-common/** + - trino-connector/** + - web/** + - docs/open-api/** + - build.gradle.kts + - gradle.properties + - gradlew + - setting.gradle.kts + outputs: + source_changes: ${{ steps.filter.outputs.source_changes }} + + # Integration test for AMD64 architecture + test-amd64-arch: + needs: changes + if: needs.changes.outputs.source_changes == 'true' + runs-on: ubuntu-latest + timeout-minutes: 60 + strategy: + matrix: + architecture: [linux/amd64] + java-version: [ 8, 11, 17 ] + test-mode: [ embedded, deploy ] + backend: [ mysql, h2] + env: + PLATFORM: ${{ matrix.architecture }} + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.java-version }} + distribution: 'temurin' + cache: 'gradle' + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Check required command + run: | + dev/ci/check_commands.sh + + - name: Package Gravitino + if : ${{ matrix.test-mode == 'deploy' }} + run: | + ./gradlew compileDistribution -x test -PjdkVersion=${{ matrix.java-version }} + + - name: Free up disk space + run: | + dev/ci/util_free_space.sh + + - name: Backend Integration Test + id: integrationTest + run: > + ./gradlew test -PskipTests -PtestMode=${{ matrix.test-mode }} -PjdkVersion=${{ matrix.java-version }} -P${{ matrix.backend }} -PskipWebITs -PskipDockerTests=false + -x :web:test -x :clients:client-python:test -x :flink-connector:test -x :spark-connector:test -x :spark-connector:spark-common:test + -x :spark-connector:spark-3.3:test -x :spark-connector:spark-3.4:test -x :spark-connector:spark-3.5:test + -x :spark-connector:spark-runtime-3.3:test -x :spark-connector:spark-runtime-3.4:test -x :spark-connector:spark-runtime-3.5:test + + - name: Upload integrate tests reports + uses: actions/upload-artifact@v3 + if: ${{ (failure() && steps.integrationTest.outcome == 'failure') || contains(github.event.pull_request.labels.*.name, 'upload log') }} + with: + name: integrate-test-reports-${{ matrix.java-version }}-${{ matrix.test-mode }}-${{ matrix.backend }} + path: | + build/reports + integration-test/build/*.log + integration-test/build/*.tar + integration-test/build/trino-ci-container-log/hive/*.* + integration-test/build/trino-ci-container-log/hdfs/*.* + distribution/package/logs/gravitino-server.out + distribution/package/logs/gravitino-server.log + catalogs/**/*.log + catalogs/**/*.tar + distribution/**/*.log diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..cbb9eaf --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,109 @@ +name: build + +# Controls when the workflow will run +on: + # Triggers the workflow on push or pull request events but only for the "main" branch + push: + branches: [ "main", "branch-*" ] + pull_request: + branches: [ "main", "branch-*" ] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + changes: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + source_changes: + - .github/** + - api/** + - bin/** + - catalogs/** + - clients/client-java/** + - clients/client-java-runtime/** + - clients/filesystem-hadoop3/** + - clients/filesystem-hadoop3-runtime/** + - common/** + - conf/** + - core/** + - dev/** + - gradle/** + - integration-test/** + - meta/** + - server/** + - server-common/** + - spark-connector/** + - flink-connector/** + - trino-connector/** + - web/** + - docs/open-api/** + - build.gradle.kts + - gradle.properties + - gradlew + - setting.gradle.kts + outputs: + source_changes: ${{ steps.filter.outputs.source_changes }} + + compile-check: + runs-on: ubuntu-latest + needs: changes + if: needs.changes.outputs.source_changes != 'true' + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-java@v4 + with: + java-version: 8 + distribution: 'temurin' + cache: 'gradle' + + - name: Build with Gradle + run: ./gradlew build -x test -PjdkVersion=8 + + build: + # The type of runner that the job will run on + runs-on: ubuntu-latest + strategy: + matrix: + java-version: [ 8, 11, 17 ] + timeout-minutes: 30 + needs: changes + if: needs.changes.outputs.source_changes == 'true' + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v3 + + - uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.java-version }} + distribution: 'temurin' + cache: 'gradle' + + - name: Test publish to local + run: ./gradlew publishToMavenLocal -x test -PjdkVersion=${{ matrix.java-version }} + + - name: Free up disk space + run: | + dev/ci/util_free_space.sh + + - name: Build with Gradle + run: ./gradlew build -PskipITs -PjdkVersion=${{ matrix.java-version }} -PskipDockerTests=false -x :clients:client-python:build + + - name: Upload unit tests report + uses: actions/upload-artifact@v3 + if: failure() + with: + name: unit test report + path: | + build/reports + catalogs/**/*.log + catalogs/**/*.tar diff --git a/.github/workflows/cron-integration-test.yml b/.github/workflows/cron-integration-test.yml new file mode 100644 index 0000000..195e1b6 --- /dev/null +++ b/.github/workflows/cron-integration-test.yml @@ -0,0 +1,108 @@ +name: Cron Integration Test + +# Controls when the workflow will run +on: + schedule: # Runs by default on main branch + - cron: '0 19 * * *' # Runs every day at 19:00 PM UTC, equal to 03:00 AM the next day in GMT+8 time zone + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ contains(github.ref, 'main') }} + +jobs: + changes: + if: github.repository == 'datastrato/gravitino' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + source_changes: + - .github/** + - api/** + - bin/** + - catalogs/** + - clients/** + - common/** + - conf/** + - core/** + - dev/** + - gradle/** + - integration-test/** + - meta/** + - server/** + - server-common/** + - spark-connector/** + - trino-connector/** + - web/** + - docs/open-api/** + - build.gradle.kts + - gradle.properties + - gradlew + - setting.gradle.kts + outputs: + source_changes: ${{ steps.filter.outputs.source_changes }} + + # Integration test for AMD64 architecture + test-amd64-arch: + needs: changes + if: needs.changes.outputs.source_changes == 'true' + runs-on: ubuntu-latest + timeout-minutes: 120 + strategy: + matrix: + architecture: [linux/amd64] + java-version: [ 8, 11, 17 ] + test-mode: [ embedded, deploy ] + env: + DOCKER_RUN_NAME: hive-amd64 + PLATFORM: ${{ matrix.architecture }} + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.java-version }} + distribution: 'temurin' + cache: 'gradle' + + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Package Gravitino + run: | + ./gradlew compileDistribution -x test -PjdkVersion=${{ matrix.java-version }} + + - name: Free up disk space + run: | + dev/ci/util_free_space.sh + + - name: Integration Test + id: integrationTest + run: | + ./gradlew test -PskipTests -PtestMode=${{ matrix.test-mode }} -PjdkVersion=${{ matrix.java-version }} -PskipDockerTests=false + + - name: Upload integrate tests reports + uses: actions/upload-artifact@v3 + if: ${{ failure() && steps.integrationTest.outcome == 'failure' }} + with: + name: integrate test reports + path: | + build/reports + integration-test/build/*.log + integration-test/build/*.tar + integration-test/build/trino-ci-container-log/hive/*.* + integration-test/build/trino-ci-container-log/hdfs/*.* + distribution/package/logs/gravitino-server.out + distribution/package/logs/gravitino-server.log + catalogs/**/*.log + catalogs/**/*.tar + distribution/**/*.log + spark-connector/v3.3/spark/build/spark-3.3-integration-test.log + spark-connector/v3.4/spark/build/spark-3.4-integration-test.log + spark-connector/v3.5/spark/build/spark-3.5-integration-test.log + flink-connector/build/flink-connector-integration-test.log + flink-connector/build/*.tar diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml new file mode 100644 index 0000000..1993a20 --- /dev/null +++ b/.github/workflows/docker-image.yml @@ -0,0 +1,99 @@ +name: Publish Docker Image + +on: + workflow_dispatch: + inputs: + image: + type: choice + description: 'Choose the image to build' + required: true + default: 'gravitino' + options: + - 'gravitino' + - 'gravitino-ci-hive' + - 'gravitino-ci-kerberos-hive' + - 'gravitino-ci-trino' + - 'gravitino-ci-doris' + - 'gravitino-ci-ranger' + - 'trino' + - 'hive' + - 'ranger' + tag: + description: 'Docker tag to apply to this image' + required: true + type: string + token: + description: 'Publish Docker token' + required: true + type: string + +jobs: + publish-docker-image: + runs-on: ubuntu-latest + timeout-minutes: 120 + env: + input_token: ${{ github.event.inputs.token }} + secrets_token: ${{ secrets.PUBLISH_DOCKER_TOKEN }} + steps: + - name: Set environment variables + run: | + if [ "${{ github.event.inputs.image }}" == "gravitino-ci-hive" ]; then + echo "image_type=hive" >> $GITHUB_ENV + echo "image_name=datastrato/gravitino-ci-hive" >> $GITHUB_ENV + elif [ "${{ github.event.inputs.image }}" == "gravitino-ci-kerberos-hive" ]; then + echo "image_type=kerberos-hive" >> $GITHUB_ENV + echo "image_name=datastrato/gravitino-ci-kerberos-hive" >> $GITHUB_ENV + elif [ "${{ github.event.inputs.image }}" == "gravitino-ci-trino" ]; then + echo "image_type=trino" >> $GITHUB_ENV + echo "image_name=datastrato/gravitino-ci-trino" >> $GITHUB_ENV + elif [ "${{ github.event.inputs.image }}" == "gravitino-ci-doris" ]; then + echo "image_type=doris" >> $GITHUB_ENV + echo "image_name=datastrato/gravitino-ci-doris" >> $GITHUB_ENV + elif [ "${{ github.event.inputs.image }}" == "gravitino-ci-ranger" ]; then + echo "image_type=ranger" >> $GITHUB_ENV + echo "image_name=datastrato/gravitino-ci-ranger" >> $GITHUB_ENV + elif [ "${{ github.event.inputs.image }}" == "gravitino" ]; then + echo "image_type=gravitino" >> $GITHUB_ENV + echo "image_name=datastrato/gravitino" >> $GITHUB_ENV + elif [ "${{ github.event.inputs.image }}" == "trino" ]; then + echo "image_type=trino" >> $GITHUB_ENV + echo "image_name=datastrato/trino" >> $GITHUB_ENV + elif [ "${{ github.event.inputs.image }}" == "hive" ]; then + echo "image_type=hive" >> $GITHUB_ENV + echo "image_name=datastrato/hive" >> $GITHUB_ENV + elif [ "${{ github.event.inputs.image }}" == "ranger" ]; then + echo "image_type=ranger" >> $GITHUB_ENV + echo "image_name=datastrato/ranger" >> $GITHUB_ENV + fi + + - name: Check publish Docker token + run: | + if [[ "${secrets_token}" != "${input_token}" ]]; then + echo "You have entered an incorrect token. Please re-enter it." + exit 1 + fi + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Login to Docker Hub + uses: docker/login-action@v2 + with: + username: datastrato + password: ${{ secrets.DOCKER_REPOSITORY_PASSWORD }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - uses: actions/checkout@v3 + + - uses: actions/setup-java@v3 + with: + java-version: '8' + distribution: 'temurin' + + - name: Build and Push the Docker image + run: | + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/hostedtoolcache/CodeQL + ./dev/docker/build-docker.sh --platform all --type ${image_type} --image ${image_name} --tag ${{ github.event.inputs.tag }} --latest diff --git a/.github/workflows/flink-integration-test.yml b/.github/workflows/flink-integration-test.yml new file mode 100644 index 0000000..c59c0fd --- /dev/null +++ b/.github/workflows/flink-integration-test.yml @@ -0,0 +1,104 @@ +name: Flink Integration Test + +# Controls when the workflow will run +on: + # Triggers the workflow on push or pull request events but only for the "main" branch + push: + branches: [ "main", "branch-*" ] + pull_request: + branches: [ "main", "branch-*" ] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + changes: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + source_changes: + - .github/** + - api/** + - bin/** + - catalogs/** + - clients/client-java/** + - clients/client-java-runtime/** + - clients/filesystem-hadoop3/** + - clients/filesystem-hadoop3-runtime/** + - common/** + - conf/** + - core/** + - dev/** + - gradle/** + - meta/** + - server/** + - server-common/** + - flink-connector/** + - docs/open-api/** + - build.gradle.kts + - gradle.properties + - gradlew + - setting.gradle.kts + outputs: + source_changes: ${{ steps.filter.outputs.source_changes }} + + # Integration test for AMD64 architecture + test-amd64-arch: + needs: changes + if: needs.changes.outputs.source_changes == 'true' + runs-on: ubuntu-latest + timeout-minutes: 30 + strategy: + matrix: + architecture: [linux/amd64] + java-version: [ 8, 11, 17 ] + env: + PLATFORM: ${{ matrix.architecture }} + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.java-version }} + distribution: 'temurin' + cache: 'gradle' + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Check required command + run: | + dev/ci/check_commands.sh + + - name: Package Gravitino + run: | + ./gradlew compileDistribution -x test -PjdkVersion=${{ matrix.java-version }} + + - name: Free up disk space + run: | + dev/ci/util_free_space.sh + + - name: Flink Integration Test + id: integrationTest + run: | + ./gradlew -PskipTests -PtestMode=embedded -PjdkVersion=${{ matrix.java-version }} -PskipDockerTests=false :flink-connector:test --tests "org.apache.gravitino.flink.connector.integration.test.**" + ./gradlew -PskipTests -PtestMode=deploy -PjdkVersion=${{ matrix.java-version }} -PskipDockerTests=false :flink-connector:test --tests "org.apache.gravitino.flink.connector.integration.test.**" + + - name: Upload integrate tests reports + uses: actions/upload-artifact@v3 + if: ${{ (failure() && steps.integrationTest.outcome == 'failure') || contains(github.event.pull_request.labels.*.name, 'upload log') }} + with: + name: flink-connector-integrate-test-reports-${{ matrix.java-version }} + path: | + build/reports + flink-connector/build/flink-connector-integration-test.log + flink-connector/build/*.tar + distribution/package/logs/gravitino-server.out + distribution/package/logs/gravitino-server.log + catalogs/**/*.log + catalogs/**/*.tar \ No newline at end of file diff --git a/.github/workflows/frontend-integration-test.yml b/.github/workflows/frontend-integration-test.yml new file mode 100644 index 0000000..2d730f3 --- /dev/null +++ b/.github/workflows/frontend-integration-test.yml @@ -0,0 +1,107 @@ +name: Frontend Integration Test + +# Controls when the workflow will run +on: + # Triggers the workflow on push or pull request events but only for the "main" branch + push: + branches: [ "main", "branch-*" ] + pull_request: + branches: [ "main", "branch-*" ] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + changes: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + source_changes: + - .github/** + - api/** + - bin/** + - catalogs/** + - clients/client-java/** + - clients/client-java-runtime/** + - clients/filesystem-hadoop3/** + - clients/filesystem-hadoop3-runtime/** + - common/** + - conf/** + - core/** + - dev/** + - gradle/** + - integration-test/** + - meta/** + - server/** + - server-common/** + - spark-connector/** + - trino-connector/** + - web/** + - docs/open-api/** + - build.gradle.kts + - gradle.properties + - gradlew + - setting.gradle.kts + outputs: + source_changes: ${{ steps.filter.outputs.source_changes }} + + # Integration test for AMD64 architecture + test-amd64-arch: + needs: changes + if: needs.changes.outputs.source_changes == 'true' + runs-on: ubuntu-latest + timeout-minutes: 60 + strategy: + matrix: + architecture: [linux/amd64] + java-version: [ 8 ] + env: + PLATFORM: ${{ matrix.architecture }} + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.java-version }} + distribution: 'temurin' + cache: 'gradle' + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Check required command + run: | + dev/ci/check_commands.sh + + - name: Package Gravitino + run: | + ./gradlew compileDistribution -x test -PjdkVersion=${{ matrix.java-version }} + + - name: Free up disk space + run: | + dev/ci/util_free_space.sh + + - name: Frontend Integration Test + id: integrationTest + run: | + ./gradlew -PskipTests -PtestMode=embedded -PjdkVersion=${{ matrix.java-version }} -PskipDockerTests=false :integration-test:test --tests "org.apache.gravitino.integration.test.web.ui.**" + ./gradlew -PskipTests -PtestMode=deploy -PjdkVersion=${{ matrix.java-version }} -PskipDockerTests=false :integration-test:test --tests "org.apache.gravitino.integration.test.web.ui.**" + + - name: Upload integrate tests reports + uses: actions/upload-artifact@v3 + if: ${{ (failure() && steps.integrationTest.outcome == 'failure') || contains(github.event.pull_request.labels.*.name, 'upload log') }} + with: + name: integrate-test-reports-${{ matrix.java-version }} + path: | + build/reports + integration-test/build/integration-test-integration-test.log + integration-test/build/*.tar + distribution/package/logs/gravitino-server.out + distribution/package/logs/gravitino-server.log + catalogs/**/*.log + catalogs/**/*.tar \ No newline at end of file diff --git a/.github/workflows/python-integration-test.yml b/.github/workflows/python-integration-test.yml new file mode 100644 index 0000000..a7ffacf --- /dev/null +++ b/.github/workflows/python-integration-test.yml @@ -0,0 +1,85 @@ +name: Python Client Integration Test + +# Controls when the workflow will run +on: + # Triggers the workflow on push or pull request events but only for the "main" branch + push: + branches: [ "main", "branch-*" ] + pull_request: + branches: [ "main", "branch-*" ] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + changes: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + source_changes: + - .github/** + - api/** + - clients/client-python/** + - common/** + - conf/** + - core/** + - meta/** + - server/** + - server-common/** + outputs: + source_changes: ${{ steps.filter.outputs.source_changes }} + + # Integration test for AMD64 architecture + test-amd64-arch: + needs: changes + if: needs.changes.outputs.source_changes == 'true' + runs-on: ubuntu-latest + timeout-minutes: 30 + strategy: + matrix: + architecture: [linux/amd64] + java-version: [ 8 ] + env: + PLATFORM: ${{ matrix.architecture }} + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.java-version }} + distribution: 'temurin' + cache: 'gradle' + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Python Client Integration Test + id: integrationTest + run: | + ./gradlew compileDistribution -x test -PjdkVersion=${{ matrix.java-version }} + + for pythonVersion in "3.8" "3.9" "3.10" "3.11" + do + echo "Use Python version ${pythonVersion} to test the Python client." + ./gradlew -PjdkVersion=${{ matrix.java-version }} -PpythonVersion=${pythonVersion} -PskipDockerTests=false :clients:client-python:test + # Clean Gravitino database to clean test data + rm -rf ./distribution/package/data + done + + - name: Upload integrate tests reports + uses: actions/upload-artifact@v3 + if: ${{ failure() && steps.integrationTest.outcome == 'failure' }} + with: + name: integrate test reports + path: | + build/reports + integration-test/build/integration-test.log + distribution/package/logs/gravitino-server.out + distribution/package/logs/gravitino-server.log + catalogs/**/*.log + catalogs/**/*.tar \ No newline at end of file diff --git a/.github/workflows/spark-integration-test.yml b/.github/workflows/spark-integration-test.yml new file mode 100644 index 0000000..9f18e20 --- /dev/null +++ b/.github/workflows/spark-integration-test.yml @@ -0,0 +1,109 @@ +name: Spark Integration Test + +# Controls when the workflow will run +on: + # Triggers the workflow on push or pull request events but only for the "main" branch + push: + branches: [ "main", "branch-*" ] + pull_request: + branches: [ "main", "branch-*" ] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + changes: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: | + source_changes: + - .github/** + - api/** + - bin/** + - catalogs/** + - clients/client-java/** + - clients/client-java-runtime/** + - clients/filesystem-hadoop3/** + - clients/filesystem-hadoop3-runtime/** + - common/** + - conf/** + - core/** + - dev/** + - gradle/** + - meta/** + - server/** + - server-common/** + - spark-connector/** + - docs/open-api/** + - build.gradle.kts + - gradle.properties + - gradlew + - setting.gradle.kts + outputs: + source_changes: ${{ steps.filter.outputs.source_changes }} + + # Integration test for AMD64 architecture + test-amd64-arch: + needs: changes + if: needs.changes.outputs.source_changes == 'true' + runs-on: ubuntu-latest + timeout-minutes: 90 + strategy: + matrix: + architecture: [linux/amd64] + java-version: [ 8, 11, 17 ] + scala-version: [ 2.12 ] + test-mode: [ embedded, deploy ] + env: + PLATFORM: ${{ matrix.architecture }} + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.java-version }} + distribution: 'temurin' + cache: 'gradle' + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Check required command + run: | + dev/ci/check_commands.sh + + - name: Package Gravitino + if : ${{ matrix.test-mode == 'deploy' }} + run: | + ./gradlew compileDistribution -x test -PjdkVersion=${{ matrix.java-version }} + + - name: Free up disk space + run: | + dev/ci/util_free_space.sh + + - name: Spark Integration Test + id: integrationTest + run: | + if [ "${{ matrix.scala-version }}" == "2.12" ];then + ./gradlew -PskipTests -PtestMode=${{ matrix.test-mode }} -PjdkVersion=${{ matrix.java-version }} -PscalaVersion=${{ matrix.scala-version }} -PskipDockerTests=false :spark-connector:spark-3.3:test --tests "org.apache.gravitino.spark.connector.integration.test.**" + fi + ./gradlew -PskipTests -PtestMode=${{ matrix.test-mode }} -PjdkVersion=${{ matrix.java-version }} -PscalaVersion=${{ matrix.scala-version }} -PskipDockerTests=false :spark-connector:spark-3.4:test --tests "org.apache.gravitino.spark.connector.integration.test.**" + ./gradlew -PskipTests -PtestMode=${{ matrix.test-mode }} -PjdkVersion=${{ matrix.java-version }} -PscalaVersion=${{ matrix.scala-version }} -PskipDockerTests=false :spark-connector:spark-3.5:test --tests "org.apache.gravitino.spark.connector.integration.test.**" + + - name: Upload integrate tests reports + uses: actions/upload-artifact@v3 + if: ${{ (failure() && steps.integrationTest.outcome == 'failure') || contains(github.event.pull_request.labels.*.name, 'upload log') }} + with: + name: spark-connector-integrate-test-reports-${{ matrix.java-version }}-${{ matrix.test-mode }} + path: | + build/reports + spark-connector/v3.3/spark/build/spark-3.3-integration-test.log + spark-connector/v3.4/spark/build/spark-3.4-integration-test.log + spark-connector/v3.5/spark/build/spark-3.5-integration-test.log + distribution/package/logs/gravitino-server.out + distribution/package/logs/gravitino-server.log diff --git a/tests/runSQLOnPlayground.sh b/tests/runSQLOnPlayground.sh new file mode 100755 index 0000000..e43d737 --- /dev/null +++ b/tests/runSQLOnPlayground.sh @@ -0,0 +1,95 @@ +#!/bin/bash + +# install trino cli +if [[ -n $(trino --version) ]]; then + echo "Trino client installed" +else + wget https://repo1.maven.org/maven2/io/trino/trino-cli/448/trino-cli-448-executable.jar -O /tmp/trino + sudo cp /tmp/trino /usr/local/bin/trino + sudo chmod +x /usr/local/bin/trino + trino --version +fi + +# check trino connection +i=0 +while [[ ! $(trino --server http://127.0.0.1:18080 -f ./trino-test.sql) && $i -le 200 ]]; do + sleep 5 + i=$(expr $i + 1) +done + +# check trino catalog loaded +j=0 +rm -rf /tmp/trino-test.sql.log +trino --server http://127.0.0.1:18080 -f ./trino-test.sql >>/tmp/trino-test.sql.log +while [[ -n $(diff ./trino-test.sql.out /tmp/trino-test.sql.log) && $j -le 200 ]]; do + sleep 5 + j=$(expr $j + 1) + rm -rf /tmp/trino-test.sql.log + trino --server http://127.0.0.1:18080 -f ./trino-test.sql >>/tmp/trino-test.sql.log +done + +# run sql and check results +rm -rf /tmp/trino-simple.sql.log +trino --server http://127.0.0.1:18080 -f ./trino-simple.sql >>/tmp/trino-simple.sql.log +if [[ -z $(diff ./trino-simple.sql.out /tmp/trino-simple.sql.log) ]]; then + echo "run trino-simple.sql successfully" +else + echo "run trino-simple.sql failed" + exit 1 +fi + +i=0 +num=$(trino --server http://127.0.0.1:18080 -f ./trino-cross-catalog.sql | wc -l) +while [[ ${num} -lt 42 && $i -le 200 ]]; do + sleep 5 + i=$(expr $i + 1) + num=$(trino --server http://127.0.0.1:18080 -f ./trino-cross-catalog.sql | wc -l) +done +rm -rf /tmp/trino-cross-catalog.sql.log +trino --server http://127.0.0.1:18080 -f ./trino-cross-catalog.sql | sort >>/tmp/trino-cross-catalog.sql.log +if [[ -z $(diff ./trino-cross-catalog.sql.out /tmp/trino-cross-catalog.sql.log) ]]; then + echo "run trino-cross-catalog.sql successfully" +else + echo "run trino-cross-catalog.sql failed" + exit 1 +fi + +for fileName in $(docker exec playground-spark ls /opt/spark/jars/ | grep gravitino-spark-connector);do + docker exec playground-spark rm -rf /opt/spark/jars/${fileName} +done +aws s3 cp s3://gravitino-spark-connector/3.4_2.12/ /tmp/gravitino-spark-connector/3.4_2.12 --recursive +docker cp /tmp/gravitino-spark-connector/3.4_2.12/gravitino-spark-connector-*.jar playground-spark:/opt/spark/jars/ +rm -rf /tmp/spark-simple.sql.log /tmp/union-spark.sql.log +docker cp ./union.sql playground-spark:/opt/spark/work-dir/ +docker cp ./spark-simple.sql playground-spark:/opt/spark/work-dir/ +sleep 2 +docker exec playground-spark bash /opt/spark/bin/spark-sql -f spark-simple.sql +if [[ $? == 0 ]]; then + echo "run spark-simple.sql successfully" +else + echo "run spark-simple.sql failed" + exit 1 +fi +docker exec playground-spark bash /opt/spark/bin/spark-sql -f union.sql | sort >> /tmp/union-spark.sql.log +if [[ -z $(diff ./union-spark.sql.out /tmp/union-spark.sql.log) ]]; then + echo "run union.sql in spark successfully" +else + echo "run union.sql in spark failed" + exit 1 +fi + +i=0 +num=$(trino --server http://127.0.0.1:18080 -f ./union.sql | wc -l) +while [[ ${num} -lt 12 && $i -le 200 ]]; do + sleep 5 + i=$(expr $i + 1) + num=$(trino --server http://127.0.0.1:18080 -f ./union.sql | wc -l) +done +rm -rf /tmp/union.sql.log +trino --server http://127.0.0.1:18080 -f ./union.sql | sort >>/tmp/union.sql.log +if [[ -z $(diff ./union-trino.sql.out /tmp/union.sql.log) ]]; then + echo "run union.sql in trino successfully" +else + echo "run union.sql in trino failed" + exit 1 +fi \ No newline at end of file diff --git a/tests/spark-simple.sql b/tests/spark-simple.sql new file mode 100644 index 0000000..3dade41 --- /dev/null +++ b/tests/spark-simple.sql @@ -0,0 +1,38 @@ +USE catalog_hive; +CREATE DATABASE IF NOT EXISTS product; +USE product; + +CREATE TABLE IF NOT EXISTS employees ( + id INT, + name STRING, + age INT +) +PARTITIONED BY (department STRING) +STORED AS PARQUET; +DESC TABLE EXTENDED employees; + +INSERT OVERWRITE TABLE employees PARTITION(department='Engineering') VALUES (1, 'John Doe', 30), (2, 'Jane Smith', 28); +INSERT OVERWRITE TABLE employees PARTITION(department='Marketing') VALUES (3, 'Mike Brown', 32); + +use catalog_rest; +create database sales; +use sales; +create table customers (customer_id int, customer_name varchar(100), customer_email varchar(100)); +describe extended customers; +insert into customers (customer_id, customer_name, customer_email) values (11,'Rory Brown','rory@123.com'); +insert into customers (customer_id, customer_name, customer_email) values (12,'Jerry Washington','jerry@dt.com'); + +use catalog_iceberg; +use mydb; +create table abc(a int, b int) partitioned by (a) TBLPROPERTIES ('format-version'='2', 'write.merge.mode'='merge-on-read', 'write.delete.mode'='merge-on-read'); +insert into abc values(1,2); +insert into abc values(2,3); +insert into abc values(3,4); +update abc set a = 4 where b = 4; +delete from abc where a = 1; +merge into abc USING (select 2 as a,20 as b) as t on abc.a = t.a when matched then update set * when not matched then insert *; +merge into abc USING (select 8 as a,8 as b) as t on abc.a = t.a when matched then update set * when not matched then insert *; + +select * from catalog_iceberg.mydb.example_table; + +select * from catalog_hive.product.page_views where country = 'USA'; \ No newline at end of file diff --git a/tests/spark-simple.sql.out b/tests/spark-simple.sql.out new file mode 100644 index 0000000..44b55fa --- /dev/null +++ b/tests/spark-simple.sql.out @@ -0,0 +1,4 @@ +1 2021-01-01 10.5 +2 2021-01-02 20.5 +3 2021-01-03 30.75 +2023-12-01 08:15:30 123457 http://example.com/about 2023-12-01 USA diff --git a/tests/trino-cross-catalog.sql b/tests/trino-cross-catalog.sql new file mode 100644 index 0000000..30f7c31 --- /dev/null +++ b/tests/trino-cross-catalog.sql @@ -0,0 +1,22 @@ +SELECT given_name, family_name, job_title, sum(total_amount) AS total_sales +FROM gt_hive2.sales.sales as s, + catalog_postgres.hr.employees AS e +where s.employee_id = e.employee_id +GROUP BY given_name, family_name, job_title +ORDER BY total_sales DESC +LIMIT 1; + +SELECT customer_name, location, SUM(total_amount) AS total_spent +FROM gt_hive2.sales.sales AS s, + gt_hive2.sales.stores AS l, + gt_hive2.sales.customers AS c +WHERE s.store_id = l.store_id AND s.customer_id = c.customer_id +GROUP BY location, customer_name +ORDER BY location, SUM(total_amount) DESC; + +SELECT e.employee_id, given_name, family_name, AVG(rating) AS average_rating, SUM(total_amount) AS total_sales +FROM catalog_postgres.hr.employees AS e, + catalog_postgres.hr.employee_performance AS p, + gt_hive2.sales.sales AS s +WHERE e.employee_id = p.employee_id AND p.employee_id = s.employee_id +GROUP BY e.employee_id, given_name, family_name; \ No newline at end of file diff --git a/tests/trino-cross-catalog.sql.out b/tests/trino-cross-catalog.sql.out new file mode 100644 index 0000000..0dbcd79 --- /dev/null +++ b/tests/trino-cross-catalog.sql.out @@ -0,0 +1,42 @@ +"10","Chelsea","Wade","5.0","299.97" +"11","Clarke","Sanders","5.833333333333333","3329.34" +"13","Risa","Barber","7.0","1779.72" +"15","Oprah","Noel","5.0","1959.82" +"18","Carolyn","Bradshaw","7.0","1029.90" +"19","Xyla","Le","4.0","271.92" +"21","Carol","Decker","5.666666666666667","2924.16" +"22","Quemby","Lucas","5.0","897.47" +"23","Phoebe","Forbes","4.25","1919.76" +"25","Elijah","Burnett","3.8","249.95" +"28","Reuben","Rojas","4.25","3799.68" +"29","Maxwell","Patel","7.0","2759.88" +"30","Edward","Reed","5.333333333333333","168.00" +"32","Jesse","Contreras","5.0","1049.76" +"41","Dale","Lindsey","7.0","1809.87" +"42","Maite","Riddle","5.0","485.43" +"44","Perry","Roberson","7.5","49.98" +"46","Oleg","Tran","10.0","1739.91" +"48","Mohammad","Caldwell","6.0","256.47" +"49","Zephr","Long","6.0","179.98" +"52","Laura","Macdonald","7.166666666666667","3839.76" +"6","Jasper","Mack","4.0","2207.64" +"Dale","Lindsey","Sales Assistant","1809.87" +"Erasmus Phelps","Nebraska","1553.79" +"Erasmus Phelps","Texas","169.99" +"Erasmus Phelps","Vermont","199.98" +"Harriet Best","Kansas","4229.70" +"Harriet Best","Nebraska","1039.81" +"Harriet Best","Texas","256.47" +"Harriet Best","Vermont","719.88" +"Lenore Wilder","Kansas","1594.84" +"Lenore Wilder","Nebraska","129.98" +"Mia Hahn","Kansas","94.97" +"Mia Hahn","Nebraska","2479.82" +"Mia Hahn","Texas","279.98" +"Mia Hahn","Vermont","479.88" +"Perry Tyler","Kansas","99.99" +"Perry Tyler","Nebraska","1379.94" +"Perry Tyler","Vermont","299.98" +"Raya Mcguire","Kansas","349.92" +"Raya Mcguire","Nebraska","1674.46" +"Raya Mcguire","Texas","135.98" diff --git a/tests/trino-simple.sql b/tests/trino-simple.sql new file mode 100644 index 0000000..1a148e0 --- /dev/null +++ b/tests/trino-simple.sql @@ -0,0 +1,37 @@ +SHOW CATALOGS; + +CREATE SCHEMA catalog_hive.company + WITH (location = 'hdfs://hive:9000/user/hive/warehouse/company.db'); + +SHOW CREATE SCHEMA catalog_hive.company; + +CREATE TABLE catalog_hive.company.employees +( + name varchar, + salary decimal(10,2) +) +WITH ( + format = 'TEXTFILE' +); + +INSERT INTO catalog_hive.company.employees (name, salary) VALUES ('Sam Evans', 55000); + +SELECT * FROM catalog_hive.company.employees; + +SHOW SCHEMAS from catalog_hive; + +DESCRIBE catalog_hive.company.employees; + +SHOW TABLES from catalog_hive.company; + +CREATE SCHEMA catalog_iceberg.mydb; + +USE catalog_iceberg.mydb; +CREATE TABLE example_table ( c1 INTEGER, c2 DATE, c3 DOUBLE)WITH ( partitioning = ARRAY['c1', 'c2'], sorted_by = ARRAY['c3'], location = 'hdfs://hive:9000/example_table'); +INSERT INTO example_table (c1, c2, c3)VALUES (1, DATE '2021-01-01', 10.5), (2, DATE '2021-01-02', 20.5), (3, DATE '2021-01-03', 30.75); + +CREATE SCHEMA catalog_hive.product; + +USE catalog_hive.product; +CREATE TABLE page_views ( view_time TIMESTAMP, user_id BIGINT, page_url VARCHAR, ds DATE, country VARCHAR)WITH ( format = 'ORC', partitioned_by = ARRAY['ds', 'country'], bucketed_by = ARRAY['user_id'], bucket_count = 50); +INSERT INTO page_views (view_time, user_id, page_url, ds, country)VALUES (TIMESTAMP '2023-12-01 08:15:30', 123457, 'http://example.com/about', DATE '2023-12-01', 'USA'), (TIMESTAMP '2023-12-01 09:20:45', 123458, 'http://example.com/contact', DATE '2023-12-01', 'Canada'), (TIMESTAMP '2023-12-01 10:25:50', 123459, 'http://example.com/blog', DATE '2023-12-01', 'UK'); \ No newline at end of file diff --git a/tests/trino-simple.sql.out b/tests/trino-simple.sql.out new file mode 100644 index 0000000..1305ba1 --- /dev/null +++ b/tests/trino-simple.sql.out @@ -0,0 +1,22 @@ +"catalog_hive" +"catalog_iceberg" +"catalog_mysql" +"catalog_postgres" +"gravitino" +"jmx" +"memory" +"system" +"tpcds" +"tpch" +"CREATE SCHEMA catalog_hive.company +WITH ( + location = 'hdfs://hive:9000/user/hive/warehouse/company.db' +)" +"Sam Evans","55000.00" +"company" +"default" +"information_schema" +"sales" +"name","varchar","","" +"salary","decimal(10,2)","","" +"employees" diff --git a/tests/trino-test.sql b/tests/trino-test.sql new file mode 100644 index 0000000..2d4dc9f --- /dev/null +++ b/tests/trino-test.sql @@ -0,0 +1 @@ +SHOW CATALOGS; \ No newline at end of file diff --git a/tests/trino-test.sql.out b/tests/trino-test.sql.out new file mode 100644 index 0000000..23c0bce --- /dev/null +++ b/tests/trino-test.sql.out @@ -0,0 +1,10 @@ +"catalog_hive" +"catalog_iceberg" +"catalog_mysql" +"catalog_postgres" +"gravitino" +"jmx" +"memory" +"system" +"tpcds" +"tpch" diff --git a/tests/union-spark.sql.out b/tests/union-spark.sql.out new file mode 100644 index 0000000..ed6ca64 --- /dev/null +++ b/tests/union-spark.sql.out @@ -0,0 +1,15 @@ +1 John Doe 30 Engineering +1 Nasim Duke nasimduke@hotmail.net +10 Ronan Joyner ronanjoyner5549@aol.com +11 Rory Brown rory@123.com +12 Jerry Washington jerry@dt.com +2 Jane Smith 2 20 +2 Jane Smith 28 Engineering +2 Perry Tyler perrytyler@outlook.com +3 Leah Swanson leahswanson1069@protonmail.com +4 Mia Hahn miahahn@yahoo.edu +5 Quin Hurst quinhurst5485@google.net +6 Harriet Best harrietbest2890@icloud.com +7 Erasmus Phelps erasmusphelps9105@protonmail.net +8 Lenore Wilder lenorewilder@aol.net +9 Raya Mcguire rayamcguire@hotmail.com diff --git a/tests/union.sql b/tests/union.sql new file mode 100644 index 0000000..1ad0168 --- /dev/null +++ b/tests/union.sql @@ -0,0 +1,7 @@ +select * from catalog_hive.sales.customers +union +select * from catalog_iceberg.sales.customers; + +select * from catalog_hive.product.employees where department = 'Engineering'; + +select e.id, e.name, abc.a, abc.b from catalog_hive.product.employees e join catalog_iceberg.mydb.abc abc on e.id = abc.a; \ No newline at end of file diff --git a/tests/union.sql.out b/tests/union.sql.out new file mode 100644 index 0000000..f2c195c --- /dev/null +++ b/tests/union.sql.out @@ -0,0 +1,15 @@ +"1","John Doe","30","Engineering" +"1","Nasim Duke","nasimduke@hotmail.net" +"10","Ronan Joyner","ronanjoyner5549@aol.com" +"11","Rory Brown","rory@123.com" +"12","Jerry Washington","jerry@dt.com" +"2","Jane Smith","2","20" +"2","Jane Smith","28","Engineering" +"2","Perry Tyler","perrytyler@outlook.com" +"3","Leah Swanson","leahswanson1069@protonmail.com" +"4","Mia Hahn","miahahn@yahoo.edu" +"5","Quin Hurst","quinhurst5485@google.net" +"6","Harriet Best","harrietbest2890@icloud.com" +"7","Erasmus Phelps","erasmusphelps9105@protonmail.net" +"8","Lenore Wilder","lenorewilder@aol.net" +"9","Raya Mcguire","rayamcguire@hotmail.com"