diff --git a/.asf.yaml b/.asf.yaml index c5d24103072d..dcab78f6fd9a 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -15,18 +15,20 @@ # github: - description: SeaTunnel is a distributed, high-performance data integration platform for the synchronization and transformation of massive data (offline & real-time). + description: SeaTunnel is a next-generation super high-performance, distributed, massive data integration tool. homepage: https://seatunnel.apache.org/ labels: - data-integration + - change-data-capture + - cdc - high-performance - offline - real-time - - data-pipeline - - sql-engine + - batch + - streaming + - data-ingestion - apache - - seatunnel - - etl-framework + - elt enabled_merge_buttons: squash: true merge: false diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 13a4d4b52d9d..fc1cefae0519 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -20,11 +20,7 @@ on: push: pull_request: branches: - - dev - paths-ignore: - - 'docs/**' - - '**/*.md' - - 'seatunnel-ui/**' + - '**' concurrency: group: backend-${{ github.event.pull_request.number || github.ref }} @@ -32,7 +28,7 @@ concurrency: jobs: license-header: - if: github.repository == '${{github.actor}}/seatunnel' + if: github.repository == 'apache/seatunnel' name: License header runs-on: ubuntu-latest timeout-minutes: 10 @@ -44,7 +40,7 @@ jobs: uses: apache/skywalking-eyes@985866ce7e324454f61e22eb2db2e998db09d6f3 code-style: - if: github.repository == '${{github.actor}}/seatunnel' + if: github.repository == 'apache/seatunnel' name: Code style runs-on: ubuntu-latest timeout-minutes: 10 @@ -56,7 +52,7 @@ jobs: run: ./mvnw --batch-mode --quiet --no-snapshot-updates clean spotless:check dead-link: - if: github.repository == '${{github.actor}}/seatunnel' + if: github.repository != 'apache/seatunnel' name: Dead links runs-on: ubuntu-latest timeout-minutes: 30 @@ -69,7 +65,7 @@ jobs: done sanity-check: - if: github.repository == '${{github.actor}}/seatunnel' + if: github.repository != 'apache/seatunnel' name: Sanity check results needs: [ license-header, code-style, dead-link ] runs-on: ubuntu-latest @@ -83,7 +79,7 @@ jobs: changes: runs-on: ubuntu-latest - if: github.repository == '${{github.actor}}/seatunnel' + if: github.repository != 'apache/seatunnel' timeout-minutes: 10 outputs: api: ${{ steps.filter.outputs.api }} @@ -235,7 +231,7 @@ jobs: echo "modules=$build_modules" >> $GITHUB_OUTPUT dependency-license: - if: needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 'true' + if: github.repository != 'apache/seatunnel' && (needs.changes.outputs.api == 'true' || needs.changes.outputs.engine == 'true') name: Dependency licenses needs: [ changes, sanity-check ] runs-on: ubuntu-latest @@ -262,7 +258,7 @@ jobs: unit-test: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'true' || (needs.changes.outputs.api == 'false' && needs.changes.outputs.ut-modules != '') + if: github.repository != 'apache/seatunnel' && (needs.changes.outputs.api == 'true' || (needs.changes.outputs.api == 'false' && needs.changes.outputs.ut-modules != '')) runs-on: ${{ matrix.os }} strategy: matrix: @@ -293,7 +289,7 @@ jobs: updated-modules-integration-test-part-1: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '' + if: github.repository != 'apache/seatunnel' && (needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '') runs-on: ${{ matrix.os }} strategy: matrix: @@ -311,14 +307,14 @@ jobs: - name: run updated modules integration test (part-1) if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '' run: | - sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 4 0` + sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 7 0` ./mvnw -T 1C -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci env: MAVEN_OPTS: -Xmx2048m updated-modules-integration-test-part-2: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '' + if: github.repository != 'apache/seatunnel' && (needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '') runs-on: ${{ matrix.os }} strategy: matrix: @@ -336,7 +332,7 @@ jobs: - name: run updated modules integration test (part-2) if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '' run: | - sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 4 1` + sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 7 1` if [ ! -z $sub_modules ]; then ./mvnw -T 1C -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci else @@ -347,7 +343,7 @@ jobs: updated-modules-integration-test-part-3: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '' + if: github.repository != 'apache/seatunnel' && (needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '') runs-on: ${{ matrix.os }} strategy: matrix: @@ -365,7 +361,7 @@ jobs: - name: run updated modules integration test (part-3) if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '' run: | - sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 4 2` + sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 7 2` if [ ! -z $sub_modules ]; then ./mvnw -T 1C -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci else @@ -376,7 +372,7 @@ jobs: updated-modules-integration-test-part-4: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '' + if: github.repository != 'apache/seatunnel' && (needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '') runs-on: ${{ matrix.os }} strategy: matrix: @@ -394,7 +390,91 @@ jobs: - name: run updated modules integration test (part-4) if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '' run: | - sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 4 3` + sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 7 3` + if [ ! -z $sub_modules ]; then + ./mvnw -T 1C -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci + else + echo "sub modules is empty, skipping" + fi + env: + MAVEN_OPTS: -Xmx2048m + updated-modules-integration-test-part-5: + needs: [ changes, sanity-check ] + if: github.repository != 'apache/seatunnel' && (needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '') + runs-on: ${{ matrix.os }} + strategy: + matrix: + java: [ '8', '11' ] + os: [ 'ubuntu-latest' ] + timeout-minutes: 90 + steps: + - uses: actions/checkout@v2 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v3 + with: + java-version: ${{ matrix.java }} + distribution: 'temurin' + cache: 'maven' + - name: run updated modules integration test (part-5) + if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '' + run: | + sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 7 4` + if [ ! -z $sub_modules ]; then + ./mvnw -T 1C -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci + else + echo "sub modules is empty, skipping" + fi + env: + MAVEN_OPTS: -Xmx2048m + updated-modules-integration-test-part-6: + needs: [ changes, sanity-check ] + if: github.repository != 'apache/seatunnel' && (needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '') + runs-on: ${{ matrix.os }} + strategy: + matrix: + java: [ '8', '11' ] + os: [ 'ubuntu-latest' ] + timeout-minutes: 90 + steps: + - uses: actions/checkout@v2 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v3 + with: + java-version: ${{ matrix.java }} + distribution: 'temurin' + cache: 'maven' + - name: run updated modules integration test (part-6) + if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '' + run: | + sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 7 5` + if [ ! -z $sub_modules ]; then + ./mvnw -T 1C -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci + else + echo "sub modules is empty, skipping" + fi + env: + MAVEN_OPTS: -Xmx2048m + updated-modules-integration-test-part-7: + needs: [ changes, sanity-check ] + if: github.repository != 'apache/seatunnel' && (needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '') + runs-on: ${{ matrix.os }} + strategy: + matrix: + java: [ '8', '11' ] + os: [ 'ubuntu-latest' ] + timeout-minutes: 90 + steps: + - uses: actions/checkout@v2 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v3 + with: + java-version: ${{ matrix.java }} + distribution: 'temurin' + cache: 'maven' + - name: run updated modules integration test (part-7) + if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != '' + run: | + sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 7 6` if [ ! -z $sub_modules ]; then ./mvnw -T 1C -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci else @@ -402,10 +482,9 @@ jobs: fi env: MAVEN_OPTS: -Xmx2048m - engine-v2-it: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'true' + if: github.repository != 'apache/seatunnel' && needs.changes.outputs.api == 'true' runs-on: ${{ matrix.os }} strategy: matrix: @@ -429,7 +508,7 @@ jobs: transform-v2-it-part-1: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'true' + if: github.repository != 'apache/seatunnel' && needs.changes.outputs.api == 'true' runs-on: ${{ matrix.os }} strategy: matrix: @@ -453,7 +532,7 @@ jobs: transform-v2-it-part-2: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'true' + if: github.repository != 'apache/seatunnel' && needs.changes.outputs.api == 'true' runs-on: ${{ matrix.os }} strategy: matrix: @@ -477,7 +556,7 @@ jobs: all-connectors-it-1: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'true' + if: github.repository != 'apache/seatunnel' && needs.changes.outputs.api == 'true' runs-on: ${{ matrix.os }} strategy: matrix: @@ -504,7 +583,7 @@ jobs: all-connectors-it-2: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'true' + if: github.repository != 'apache/seatunnel' && needs.changes.outputs.api == 'true' runs-on: ${{ matrix.os }} strategy: matrix: @@ -531,7 +610,7 @@ jobs: all-connectors-it-3: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'true' + if: github.repository != 'apache/seatunnel' && needs.changes.outputs.api == 'true' runs-on: ${{ matrix.os }} strategy: matrix: @@ -558,7 +637,7 @@ jobs: all-connectors-it-4: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'true' + if: github.repository != 'apache/seatunnel' && needs.changes.outputs.api == 'true' runs-on: ${{ matrix.os }} strategy: matrix: @@ -585,7 +664,7 @@ jobs: all-connectors-it-5: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'true' + if: github.repository != 'apache/seatunnel' && needs.changes.outputs.api == 'true' runs-on: ${{ matrix.os }} strategy: matrix: @@ -612,7 +691,7 @@ jobs: all-connectors-it-6: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'true' + if: github.repository != 'apache/seatunnel' && needs.changes.outputs.api == 'true' runs-on: ${{ matrix.os }} strategy: matrix: @@ -639,7 +718,7 @@ jobs: all-connectors-it-7: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'true' + if: github.repository != 'apache/seatunnel' && needs.changes.outputs.api == 'true' runs-on: ${{ matrix.os }} strategy: matrix: @@ -666,7 +745,7 @@ jobs: jdbc-connectors-it-part-1: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'true' + if: github.repository != 'apache/seatunnel' && needs.changes.outputs.api == 'true' runs-on: ${{ matrix.os }} strategy: matrix: @@ -690,7 +769,7 @@ jobs: jdbc-connectors-it-part-2: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'true' + if: github.repository != 'apache/seatunnel' && needs.changes.outputs.api == 'true' runs-on: ${{ matrix.os }} strategy: matrix: @@ -714,7 +793,7 @@ jobs: jdbc-connectors-it-part-3: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'true' + if: github.repository != 'apache/seatunnel' && needs.changes.outputs.api == 'true' runs-on: ${{ matrix.os }} strategy: matrix: @@ -738,7 +817,7 @@ jobs: jdbc-connectors-it-part-4: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'true' + if: github.repository != 'apache/seatunnel' && needs.changes.outputs.api == 'true' runs-on: ${{ matrix.os }} strategy: matrix: @@ -760,9 +839,33 @@ jobs: env: MAVEN_OPTS: -Xmx4096m + jdbc-connectors-it-part-5: + needs: [ changes, sanity-check ] + if: github.repository != 'apache/seatunnel' && needs.changes.outputs.api == 'true' + runs-on: ${{ matrix.os }} + strategy: + matrix: + java: [ '8', '11' ] + os: [ 'ubuntu-latest' ] + timeout-minutes: 90 + steps: + - uses: actions/checkout@v2 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v3 + with: + java-version: ${{ matrix.java }} + distribution: 'temurin' + cache: 'maven' + - name: run jdbc connectors integration test (part-5) + if: needs.changes.outputs.api == 'true' + run: | + ./mvnw -B -T 1C verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-jdbc-e2e-part-5 -am -Pci + env: + MAVEN_OPTS: -Xmx4096m + kafka-connector-it: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'true' + if: github.repository != 'apache/seatunnel' && needs.changes.outputs.api == 'true' runs-on: ${{ matrix.os }} strategy: matrix: @@ -786,7 +889,7 @@ jobs: rocketmq-connector-it: needs: [ changes, sanity-check ] - if: needs.changes.outputs.api == 'true' + if: github.repository != 'apache/seatunnel' && needs.changes.outputs.api == 'true' runs-on: ${{ matrix.os }} strategy: matrix: @@ -806,4 +909,4 @@ jobs: run: | ./mvnw -B -T 1C verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-rocketmq-e2e -am -Pci env: - MAVEN_OPTS: -Xmx4096m \ No newline at end of file + MAVEN_OPTS: -Xmx4096m diff --git a/.github/workflows/notify_test_workflow.yml b/.github/workflows/notify_test_workflow.yml new file mode 100644 index 000000000000..84196ac888ef --- /dev/null +++ b/.github/workflows/notify_test_workflow.yml @@ -0,0 +1,152 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Intentionally has a general name. +# because the test status check created in GitHub Actions +# currently randomly picks any associated workflow. +# So, the name was changed to make sense in that context too. +# See also https://github.community/t/specify-check-suite-when-creating-a-checkrun/118380/10 +name: On pull request update +on: + pull_request_target: + types: [opened, reopened, synchronize] + +jobs: + notify: + name: Notify test workflow + runs-on: ubuntu-20.04 + permissions: + actions: read + checks: write + steps: + - name: "Notify test workflow" + uses: actions/github-script@v6 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const endpoint = 'GET /repos/:owner/:repo/actions/workflows/:id/runs?&branch=:branch' + const check_run_endpoint = 'GET /repos/:owner/:repo/commits/:ref/check-runs' + + // TODO: Should use pull_request.user and pull_request.user.repos_url? + // If a different person creates a commit to another forked repo, + // it wouldn't be able to detect. + const params = { + owner: context.payload.pull_request.head.repo.owner.login, + repo: context.payload.pull_request.head.repo.name, + id: 'build_main.yml', + branch: context.payload.pull_request.head.ref, + } + const check_run_params = { + owner: context.payload.pull_request.head.repo.owner.login, + repo: context.payload.pull_request.head.repo.name, + ref: context.payload.pull_request.head.ref, + } + + console.log('Ref: ' + context.payload.pull_request.head.ref) + console.log('SHA: ' + context.payload.pull_request.head.sha) + + // Wait 3 seconds to make sure the fork repository triggered a workflow. + await new Promise(r => setTimeout(r, 3000)) + + let runs + try { + runs = await github.request(endpoint, params) + } catch (error) { + console.error(error) + // Assume that runs were not found. + } + + const name = 'Build' + const head_sha = context.payload.pull_request.head.sha + let status = 'queued' + + if (!runs || runs.data.workflow_runs.length === 0) { + status = 'completed' + const conclusion = 'action_required' + + github.rest.checks.create({ + owner: context.repo.owner, + repo: context.repo.repo, + name: name, + head_sha: head_sha, + status: status, + conclusion: conclusion, + output: { + title: 'Workflow run detection failed', + summary: ` + Unable to detect the workflow run for testing the changes in your PR. + + 1. If you did not enable GitHub Actions in your forked repository, please enable it by clicking the button as shown in the image below. See also [Disabling or limiting GitHub Actions for a repository](https://docs.github.com/en/github/administering-a-repository/disabling-or-limiting-github-actions-for-a-repository) for more details. + 2. It is possible your branch is based on the old \`dev\` branch in Apache SeaTunnel, please sync your branch to the latest master branch. For example as below: + \`\`\`bash + git fetch upstream + git rebase upstream/master + git push origin YOUR_BRANCH --force + \`\`\``, + images: [ + { + alt: 'enabling workflows button', + image_url: 'https://raw.githubusercontent.com/apache/spark/master/.github/workflows/images/workflow-enable-button.png' + } + ] + } + }) + } else { + const run_id = runs.data.workflow_runs[0].id + + if (runs.data.workflow_runs[0].head_sha != context.payload.pull_request.head.sha) { + throw new Error('There was a new unsynced commit pushed. Please retrigger the workflow.'); + } + + // Here we get check run ID to provide Check run view instead of Actions view, see also SPARK-37879. + const check_runs = await github.request(check_run_endpoint, check_run_params) + const check_run_head = check_runs.data.check_runs.filter(r => r.name === "Run / Check changes")[0] + + if (check_run_head.head_sha != context.payload.pull_request.head.sha) { + throw new Error('There was a new unsynced commit pushed. Please retrigger the workflow.'); + } + + const check_run_url = 'https://github.com/' + + context.payload.pull_request.head.repo.full_name + + '/runs/' + + check_run_head.id + + const actions_url = 'https://github.com/' + + context.payload.pull_request.head.repo.full_name + + '/actions/runs/' + + run_id + + github.rest.checks.create({ + owner: context.repo.owner, + repo: context.repo.repo, + name: name, + head_sha: head_sha, + status: status, + output: { + title: 'Test results', + summary: '[See test results](' + check_run_url + ')', + text: JSON.stringify({ + owner: context.payload.pull_request.head.repo.owner.login, + repo: context.payload.pull_request.head.repo.name, + run_id: run_id + }) + }, + details_url: actions_url, + }) + } diff --git a/.github/workflows/update_build_status.yml b/.github/workflows/update_build_status.yml new file mode 100644 index 000000000000..05cf4914a25c --- /dev/null +++ b/.github/workflows/update_build_status.yml @@ -0,0 +1,108 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +name: Update build status workflow + +on: + schedule: + - cron: "*/15 * * * *" + +jobs: + update: + name: Update build status + runs-on: ubuntu-20.04 + permissions: + actions: read + checks: write + steps: + - name: "Update build status" + uses: actions/github-script@v6 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const endpoint = 'GET /repos/:owner/:repo/pulls?state=:state' + const params = { + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open' + } + + // See https://docs.github.com/en/graphql/reference/enums#mergestatestatus + const maybeReady = ['behind', 'clean', 'draft', 'has_hooks', 'unknown', 'unstable']; + + // Iterate open PRs + for await (const prs of github.paginate.iterator(endpoint,params)) { + // Each page + for await (const pr of prs.data) { + console.log('SHA: ' + pr.head.sha) + console.log(' Mergeable status: ' + pr.mergeable_state) + if (pr.mergeable_state == null || maybeReady.includes(pr.mergeable_state)) { + const checkRuns = await github.request('GET /repos/{owner}/{repo}/commits/{ref}/check-runs', { + owner: context.repo.owner, + repo: context.repo.repo, + ref: pr.head.sha + }) + + // Iterator GitHub Checks in the PR + for await (const cr of checkRuns.data.check_runs) { + if (cr.name == 'Build' && cr.conclusion != "action_required") { + // text contains parameters to make request in JSON. + const params = JSON.parse(cr.output.text) + + // Get the workflow run in the forked repository + let run + try { + run = await github.request('GET /repos/{owner}/{repo}/actions/runs/{run_id}', params) + } catch (error) { + console.error(error) + // Run not found. This can happen when the PR author removes GitHub Actions runs or + // disalbes GitHub Actions. + continue + } + + // Keep syncing the status of the checks + if (run.data.status == 'completed') { + console.log(' Run ' + cr.id + ': set status (' + run.data.status + ') and conclusion (' + run.data.conclusion + ')') + const response = await github.request('PATCH /repos/{owner}/{repo}/check-runs/{check_run_id}', { + owner: context.repo.owner, + repo: context.repo.repo, + check_run_id: cr.id, + output: cr.output, + status: run.data.status, + conclusion: run.data.conclusion, + details_url: run.data.details_url + }) + } else { + console.log(' Run ' + cr.id + ': set status (' + run.data.status + ')') + const response = await github.request('PATCH /repos/{owner}/{repo}/check-runs/{check_run_id}', { + owner: context.repo.owner, + repo: context.repo.repo, + check_run_id: cr.id, + output: cr.output, + status: run.data.status, + details_url: run.data.details_url + }) + } + + break + } + } + } + } + } diff --git a/.gitignore b/.gitignore index 25977068e4fe..74311a0fa057 100644 --- a/.gitignore +++ b/.gitignore @@ -48,4 +48,5 @@ test.conf spark-warehouse *.flattened-pom.xml -seatunnel-examples \ No newline at end of file +seatunnel-examples +/lib/* \ No newline at end of file diff --git a/DISCLAIMER b/DISCLAIMER index fac720f1f3eb..517e33ffafa9 100644 --- a/DISCLAIMER +++ b/DISCLAIMER @@ -1,4 +1,4 @@ -Apache SeaTunnel (incubating) is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator PMC. +Apache SeaTunnel is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator PMC. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, diff --git a/README.md b/README.md index e7f898bd6594..0bce6778f0f1 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ Please follow this [document](docs/en/contribution/setup.md). * Mail list: **dev@seatunnel.apache.org**. Mail to `dev-subscribe@seatunnel.apache.org`, follow the reply to subscribe the mail list. -* Slack: https://the-asf.slack.com/archives/C053HND1D6X +* Slack: https://s.apache.org/seatunnel-slack * Twitter: https://twitter.com/ASFSeaTunnel * [Bilibili](https://space.bilibili.com/1542095008) (for Chinese users) diff --git a/bin/install-plugin.cmd b/bin/install-plugin.cmd new file mode 100644 index 000000000000..4df77b968caf --- /dev/null +++ b/bin/install-plugin.cmd @@ -0,0 +1,60 @@ +@echo off +REM Licensed to the Apache Software Foundation (ASF) under one or more +REM contributor license agreements. See the NOTICE file distributed with +REM this work for additional information regarding copyright ownership. +REM The ASF licenses this file to You under the Apache License, Version 2.0 +REM (the "License"); you may not use this file except in compliance with +REM the License. You may obtain a copy of the License at +REM +REM http://www.apache.org/licenses/LICENSE-2.0 +REM +REM Unless required by applicable law or agreed to in writing, software +REM distributed under the License is distributed on an "AS IS" BASIS, +REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +REM See the License for the specific language governing permissions and +REM limitations under the License. + +REM This script is used to download the connector plug-ins required during the running process. +REM All are downloaded by default. You can also choose what you need. +REM You only need to configure the plug-in name in config\plugin_config.txt. + +REM Get seatunnel home +set "SEATUNNEL_HOME=%~dp0..\" +echo Set SEATUNNEL_HOME to [%SEATUNNEL_HOME%] + +REM Connector default version is 2.3.3, you can also choose a custom version. eg: 2.1.2: install-plugin.bat 2.1.2 +set "version=2.3.3" +if not "%~1"=="" set "version=%~1" +echo Install hadoop shade jar, usage version is %version% + +REM Create the lib directory +if not exist "%SEATUNNEL_HOME%\lib" ( + mkdir "%SEATUNNEL_HOME%\lib" + echo create lib directory +) + +call "%SEATUNNEL_HOME%\mvnw.cmd" dependency:get -DgroupId="org.apache.seatunnel" -Dclassifier="optional" -DartifactId="seatunnel-hadoop3-3.1.4-uber" -Dversion="%version%" -Ddest="%SEATUNNEL_HOME%\lib" + +echo Install SeaTunnel connectors plugins, usage version is %version% + +REM Create the connectors directory +if not exist "%SEATUNNEL_HOME%\connectors" ( + mkdir "%SEATUNNEL_HOME%\connectors" + echo create connectors directory +) + +REM Create the seatunnel connectors directory (for v2) +if not exist "%SEATUNNEL_HOME%\connectors\seatunnel" ( + mkdir "%SEATUNNEL_HOME%\connectors\seatunnel" + echo create seatunnel connectors directory +) + +for /f "usebackq delims=" %%a in ("%SEATUNNEL_HOME%\config\plugin_config") do ( + set "line=%%a" + setlocal enabledelayedexpansion + if "!line:~0,1!" neq "-" if "!line:~0,1!" neq "#" ( + echo install connector : !line! + call "%SEATUNNEL_HOME%\mvnw.cmd" dependency:get -DgroupId="org.apache.seatunnel" -DartifactId="!line!" -Dversion="%version%" -Ddest="%SEATUNNEL_HOME%\connectors\seatunnel" + ) + endlocal +) diff --git a/bin/install-plugin.sh b/bin/install-plugin.sh index cddf4ebf68a2..b8a1cca71492 100755 --- a/bin/install-plugin.sh +++ b/bin/install-plugin.sh @@ -23,8 +23,8 @@ # get seatunnel home SEATUNNEL_HOME=$(cd $(dirname $0);cd ../;pwd) -# connector default version is 2.3.1, you can also choose a custom version. eg: 2.1.2: sh install-plugin.sh 2.1.2 -version=2.3.1 +# connector default version is 2.3.3, you can also choose a custom version. eg: 2.1.2: sh install-plugin.sh 2.1.2 +version=2.3.3 if [ -n "$1" ]; then version="$1" diff --git a/config/plugin_config b/config/plugin_config index 95b952b31bf1..0c7e119ecfc4 100644 --- a/config/plugin_config +++ b/config/plugin_config @@ -36,7 +36,7 @@ connector-file-ftp connector-file-hadoop connector-file-local connector-file-oss -connector-file-oss-jindo +connector-file-jindo-oss connector-file-s3 connector-file-sftp connector-google-sheets diff --git a/config/seatunnel-env.cmd b/config/seatunnel-env.cmd new file mode 100644 index 000000000000..79c2d3c117c6 --- /dev/null +++ b/config/seatunnel-env.cmd @@ -0,0 +1,21 @@ +@echo off +REM Licensed to the Apache Software Foundation (ASF) under one or more +REM contributor license agreements. See the NOTICE file distributed with +REM this work for additional information regarding copyright ownership. +REM The ASF licenses this file to You under the Apache License, Version 2.0 +REM (the "License"); you may not use this file except in compliance with +REM the License. You may obtain a copy of the License at +REM +REM http://www.apache.org/licenses/LICENSE-2.0 +REM +REM Unless required by applicable law or agreed to in writing, software +REM distributed under the License is distributed on an "AS IS" BASIS, +REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +REM See the License for the specific language governing permissions and +REM limitations under the License. + +REM Home directory of spark distribution. +if "%SPARK_HOME%" == "" set "SPARK_HOME=C:\Program Files\spark" + +REM Home directory of flink distribution. +if "%FLINK_HOME%" == "" set "FLINK_HOME=C:\Program Files\flink" \ No newline at end of file diff --git a/config/seatunnel.yaml b/config/seatunnel.yaml index 7e496ca39adb..5961c839238b 100644 --- a/config/seatunnel.yaml +++ b/config/seatunnel.yaml @@ -17,6 +17,7 @@ seatunnel: engine: + history-job-expire-minutes: 1440 backup-count: 1 queue-type: blockingqueue print-execution-info-interval: 60 @@ -26,8 +27,6 @@ seatunnel: checkpoint: interval: 10000 timeout: 60000 - max-concurrent: 1 - tolerable-failure: 2 storage: type: hdfs max-retained: 3 diff --git a/docs/en/about.md b/docs/en/about.md index d2e28693915a..57a800343b02 100644 --- a/docs/en/about.md +++ b/docs/en/about.md @@ -2,7 +2,7 @@ seatunnel logo -[![Slack](https://img.shields.io/badge/slack-%23seatunnel-4f8eba?logo=slack)](https://the-asf.slack.com/archives/C053HND1D6X) +[![Slack](https://img.shields.io/badge/slack-%23seatunnel-4f8eba?logo=slack)](https://s.apache.org/seatunnel-slack) [![Twitter Follow](https://img.shields.io/twitter/follow/ASFSeaTunnel.svg?label=Follow&logo=twitter)](https://twitter.com/ASFSeaTunnel) SeaTunnel is a very easy-to-use, ultra-high-performance, distributed data integration platform that supports real-time diff --git a/docs/en/connector-v2/formats/cdc-compatible-debezium-json.md b/docs/en/connector-v2/formats/cdc-compatible-debezium-json.md index 002bd0c3bec4..e0751a249272 100644 --- a/docs/en/connector-v2/formats/cdc-compatible-debezium-json.md +++ b/docs/en/connector-v2/formats/cdc-compatible-debezium-json.md @@ -19,7 +19,6 @@ source { MySQL-CDC { result_table_name = "table1" - hostname = localhost base-url="jdbc:mysql://localhost:3306/test" "startup.mode"=INITIAL catalog { diff --git a/docs/en/connector-v2/formats/kafka-compatible-kafkaconnect-json.md b/docs/en/connector-v2/formats/kafka-compatible-kafkaconnect-json.md new file mode 100644 index 000000000000..7de8a9e838b2 --- /dev/null +++ b/docs/en/connector-v2/formats/kafka-compatible-kafkaconnect-json.md @@ -0,0 +1,47 @@ +# Kafka source compatible kafka-connect-json + +Seatunnel connector kafka supports parsing data extracted through kafka connect source, especially data extracted from kafka connect jdbc and kafka connect debezium + +# How to use + +## Kafka output to mysql + +```bash +env { + execution.parallelism = 1 + job.mode = "BATCH" +} + +source { + Kafka { + bootstrap.servers = "localhost:9092" + topic = "jdbc_source_record" + result_table_name = "kafka_table" + start_mode = earliest + schema = { + fields { + id = "int" + name = "string" + description = "string" + weight = "string" + } + }, + format = COMPATIBLE_KAFKA_CONNECT_JSON + } +} + + +sink { + Jdbc { + driver = com.mysql.cj.jdbc.Driver + url = "jdbc:mysql://localhost:3306/seatunnel" + user = st_user + password = seatunnel + generate_sink_sql = true + database = seatunnel + table = jdbc_sink + primary_keys = ["id"] + } +} +``` + diff --git a/docs/en/connector-v2/sink/AmazonDynamoDB.md b/docs/en/connector-v2/sink/AmazonDynamoDB.md index e8fe0b23afbe..6e880fb4af42 100644 --- a/docs/en/connector-v2/sink/AmazonDynamoDB.md +++ b/docs/en/connector-v2/sink/AmazonDynamoDB.md @@ -20,7 +20,6 @@ Write data to Amazon DynamoDB | secret_access_key | string | yes | - | | table | string | yes | - | | batch_size | string | no | 25 | -| batch_interval_ms | string | no | 1000 | | common-options | | no | - | ### url [string] diff --git a/docs/en/connector-v2/sink/Console.md b/docs/en/connector-v2/sink/Console.md index fd7623d7d389..55df281b2752 100644 --- a/docs/en/connector-v2/sink/Console.md +++ b/docs/en/connector-v2/sink/Console.md @@ -14,14 +14,24 @@ Used to send data to Console. Both support streaming and batch mode. ## Options -| name | type | required | default value | -|----------------|------|----------|---------------| -| common-options | | no | - | +| name | type | required | default value | +|--------------------|---------|----------|---------------| +| common-options | | no | - | +| log.print.data | boolean | no | yes | +| log.print.delay.ms | int | no | 0 | ### common options Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details +### log.print.data + +Flag to determine whether data should be printed in the logs. The default value is `true`. + +### log.print.delay.ms + +Delay in milliseconds between printing each data item to the logs. The default value is `0`. + ## Example simple: diff --git a/docs/en/connector-v2/sink/DB2.md b/docs/en/connector-v2/sink/DB2.md index 8f5a7285e35d..fc0aaca0943c 100644 --- a/docs/en/connector-v2/sink/DB2.md +++ b/docs/en/connector-v2/sink/DB2.md @@ -65,8 +65,7 @@ semantics (using XA transaction guarantee). | support_upsert_by_query_primary_key_exist | Boolean | No | false | Choose to use INSERT sql, UPDATE sql to process update events(INSERT, UPDATE_AFTER) based on query primary key exists. This configuration is only used when database unsupport upsert syntax. **Note**: that this method has low performance | | connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete. | | max_retries | Int | No | 0 | The number of retries to submit failed (executeBatch) | -| batch_size | Int | No | 1000 | For batch writing, when the number of buffered records reaches the number of `batch_size` or the time reaches `batch_interval_ms`
, the data will be flushed into the database | -| batch_interval_ms | Int | No | 1000 | For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms`, the data will be flushed into the database | +| batch_size | Int | No | 1000 | For batch writing, when the number of buffered records reaches the number of `batch_size` or the time reaches `checkpoint.interval`
, the data will be flushed into the database | | is_exactly_once | Boolean | No | false | Whether to enable exactly-once semantics, which will use Xa transactions. If on, you need to
set `xa_data_source_class_name`. | | generate_sink_sql | Boolean | No | false | Generate sql statements based on the database table you want to write to | | xa_data_source_class_name | String | No | - | The xa data source class name of the database Driver, for example, DB2 is `com.db2.cj.jdbc.Db2XADataSource`, and
please refer to appendix for other data sources | diff --git a/docs/en/connector-v2/sink/Feishu.md b/docs/en/connector-v2/sink/Feishu.md index bd45977ce809..5573086db3e4 100644 --- a/docs/en/connector-v2/sink/Feishu.md +++ b/docs/en/connector-v2/sink/Feishu.md @@ -2,41 +2,55 @@ > Feishu sink connector -## Description - -Used to launch Feishu web hooks using data. - -> For example, if the data from upstream is [`age: 12, name: tyrantlucifer`], the body content is the following: `{"age": 12, "name": "tyrantlucifer"}` +## Support Those Engines -**Tips: Feishu sink only support `post json` webhook and the data from source will be treated as body content in web hook.** +> Spark
+> Flink
+> SeaTunnel Zeta
-## Key features +## Key Features - [ ] [exactly-once](../../concept/connector-v2-features.md) +- [ ] [cdc](../../concept/connector-v2-features.md) -## Options - -| name | type | required | default value | -|----------------|--------|----------|---------------| -| url | String | Yes | - | -| headers | Map | No | - | -| common-options | | no | - | - -### url [string] - -Feishu webhook url - -### headers [Map] - -Http request headers +## Description -### common options +Used to launch Feishu web hooks using data. -Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details +> For example, if the data from upstream is [`age: 12, name: tyrantlucifer`], the body content is the following: `{"age": 12, "name": "tyrantlucifer"}` -## Example +**Tips: Feishu sink only support `post json` webhook and the data from source will be treated as body content in web hook.** -simple: +## Data Type Mapping + +| Seatunnel Data type | Feishu Data type | +|-----------------------------|------------------| +| ROW
MAP | Json | +| NULL | null | +| BOOLEAN | boolean | +| TINYINT | byte | +| SMALLINT | short | +| INT | int | +| BIGINT | long | +| FLOAT | float | +| DOUBLE | double | +| DECIMAL | BigDecimal | +| BYTES | byte[] | +| STRING | String | +| TIME
TIMESTAMP
TIME | String | +| ARRAY | JsonArray | + +## Sink Options + +| Name | Type | Required | Default | Description | +|----------------|--------|----------|---------|-----------------------------------------------------------------------------------------------------| +| url | String | Yes | - | Feishu webhook url | +| headers | Map | No | - | Http request headers | +| common-options | | no | - | Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details | + +## Task Example + +### Simple: ```hocon Feishu { diff --git a/docs/en/connector-v2/sink/HdfsFile.md b/docs/en/connector-v2/sink/HdfsFile.md index 34ce19714b4d..135c5115c2aa 100644 --- a/docs/en/connector-v2/sink/HdfsFile.md +++ b/docs/en/connector-v2/sink/HdfsFile.md @@ -1,20 +1,14 @@ # HdfsFile -> HDFS file sink connector +> HDFS File Sink Connector -## Description - -Output data to hdfs file - -:::tip - -If you use spark/flink, In order to use this connector, You must ensure your spark/flink cluster already integrated hadoop. The tested hadoop version is 2.x. - -If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you download and install SeaTunnel Engine. You can check the jar package under ${SEATUNNEL_HOME}/lib to confirm this. +## Support Those Engines -::: +> Spark
+> Flink
+> SeaTunnel Zeta
-## Key features +## Key Features - [x] [exactly-once](../../concept/connector-v2-features.md) @@ -30,183 +24,120 @@ By default, we use 2PC commit to ensure `exactly-once` - [x] compress codec - [x] lzo -## Options - -| name | type | required | default value | remarks | -|----------------------------------|---------|----------|--------------------------------------------|-----------------------------------------------------------| -| fs.defaultFS | string | yes | - | | -| path | string | yes | - | | -| hdfs_site_path | string | no | - | | -| custom_filename | boolean | no | false | Whether you need custom the filename | -| file_name_expression | string | no | "${transactionId}" | Only used when custom_filename is true | -| filename_time_format | string | no | "yyyy.MM.dd" | Only used when custom_filename is true | -| file_format_type | string | no | "csv" | | -| field_delimiter | string | no | '\001' | Only used when file_format_type is text | -| row_delimiter | string | no | "\n" | Only used when file_format_type is text | -| have_partition | boolean | no | false | Whether you need processing partitions. | -| partition_by | array | no | - | Only used then have_partition is true | -| partition_dir_expression | string | no | "${k0}=${v0}/${k1}=${v1}/.../${kn}=${vn}/" | Only used then have_partition is true | -| is_partition_field_write_in_file | boolean | no | false | Only used then have_partition is true | -| sink_columns | array | no | | When this parameter is empty, all fields are sink columns | -| is_enable_transaction | boolean | no | true | | -| batch_size | int | no | 1000000 | | -| compress_codec | string | no | none | | -| kerberos_principal | string | no | - | -| kerberos_keytab_path | string | no | - | | -| compress_codec | string | no | none | | -| common-options | object | no | - | | -| max_rows_in_memory | int | no | - | Only used when file_format_type is excel. | -| sheet_name | string | no | Sheet${Random number} | Only used when file_format_type is excel. | - -### fs.defaultFS [string] - -The hadoop cluster address that start with `hdfs://`, for example: `hdfs://hadoopcluster` - -### path [string] - -The target dir path is required. - -### hdfs_site_path [string] - -The path of `hdfs-site.xml`, used to load ha configuration of namenodes - -### custom_filename [boolean] - -Whether custom the filename - -### file_name_expression [string] - -Only used when `custom_filename` is `true` - -`file_name_expression` describes the file expression which will be created into the `path`. We can add the variable `${now}` or `${uuid}` in the `file_name_expression`, like `test_${uuid}_${now}`, -`${now}` represents the current time, and its format can be defined by specifying the option `filename_time_format`. - -Please note that, If `is_enable_transaction` is `true`, we will auto add `${transactionId}_` in the head of the file. - -### filename_time_format [string] - -Only used when `custom_filename` is `true` - -When the format in the `file_name_expression` parameter is `xxxx-${now}` , `filename_time_format` can specify the time format of the path, and the default value is `yyyy.MM.dd` . The commonly used time formats are listed as follows: - -| Symbol | Description | -|--------|--------------------| -| y | Year | -| M | Month | -| d | Day of month | -| H | Hour in day (0-23) | -| m | Minute in hour | -| s | Second in minute | - -### file_format_type [string] - -We supported as the following file types: - -`text` `json` `csv` `orc` `parquet` `excel` - -Please note that, The final file name will end with the file_format_type's suffix, the suffix of the text file is `txt`. - -### field_delimiter [string] - -The separator between columns in a row of data. Only needed by `text` file format. - -### row_delimiter [string] - -The separator between rows in a file. Only needed by `text` file format. - -### have_partition [boolean] - -Whether you need processing partitions. - -### partition_by [array] - -Only used when `have_partition` is `true`. - -Partition data based on selected fields. - -### partition_dir_expression [string] - -Only used when `have_partition` is `true`. - -If the `partition_by` is specified, we will generate the corresponding partition directory based on the partition information, and the final file will be placed in the partition directory. - -Default `partition_dir_expression` is `${k0}=${v0}/${k1}=${v1}/.../${kn}=${vn}/`. `k0` is the first partition field and `v0` is the value of the first partition field. - -### is_partition_field_write_in_file [boolean] - -Only used when `have_partition` is `true`. - -If `is_partition_field_write_in_file` is `true`, the partition field and the value of it will be write into data file. - -For example, if you want to write a Hive Data File, Its value should be `false`. - -### sink_columns [array] - -Which columns need be write to file, default value is all of the columns get from `Transform` or `Source`. -The order of the fields determines the order in which the file is actually written. - -### is_enable_transaction [boolean] - -If `is_enable_transaction` is true, we will ensure that data will not be lost or duplicated when it is written to the target directory. - -Please note that, If `is_enable_transaction` is `true`, we will auto add `${transactionId}_` in the head of the file. - -Only support `true` now. - -### batch_size [int] - -The maximum number of rows in a file. For SeaTunnel Engine, the number of lines in the file is determined by `batch_size` and `checkpoint.interval` jointly decide. If the value of `checkpoint.interval` is large enough, sink writer will write rows in a file until the rows in the file larger than `batch_size`. If `checkpoint.interval` is small, the sink writer will create a new file when a new checkpoint trigger. - -### compress_codec [string] - -The compress codec of files and the details that supported as the following shown: - -- txt: `lzo` `none` -- json: `lzo` `none` -- csv: `lzo` `none` -- orc: `lzo` `snappy` `lz4` `zlib` `none` -- parquet: `lzo` `snappy` `lz4` `gzip` `brotli` `zstd` `none` - -Tips: excel type does not support any compression format - -### kerberos_principal [string] - -The principal of kerberos - -### kerberos_keytab_path [string] - -The keytab path of kerberos - -### common options - -Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details +## Description -### max_rows_in_memory [int] +Output data to hdfs file -When File Format is Excel,The maximum number of data items that can be cached in the memory. +## Supported DataSource Info + +| Datasource | Supported Versions | +|------------|--------------------| +| HdfsFile | hadoop 2.x and 3.x | + +## Sink Options + +| Name | Type | Required | Default | Description | +|----------------------------------|---------|----------|--------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| fs.defaultFS | string | yes | - | The hadoop cluster address that start with `hdfs://`, for example: `hdfs://hadoopcluster` | +| path | string | yes | - | The target dir path is required. | +| hdfs_site_path | string | no | - | The path of `hdfs-site.xml`, used to load ha configuration of namenodes | +| custom_filename | boolean | no | false | Whether you need custom the filename | +| file_name_expression | string | no | "${transactionId}" | Only used when `custom_filename` is `true`.`file_name_expression` describes the file expression which will be created into the `path`. We can add the variable `${now}` or `${uuid}` in the `file_name_expression`, like `test_${uuid}_${now}`,`${now}` represents the current time, and its format can be defined by specifying the option `filename_time_format`.Please note that, If `is_enable_transaction` is `true`, we will auto add `${transactionId}_` in the head of the file. | +| filename_time_format | string | no | "yyyy.MM.dd" | Only used when `custom_filename` is `true`.When the format in the `file_name_expression` parameter is `xxxx-${now}` , `filename_time_format` can specify the time format of the path, and the default value is `yyyy.MM.dd` . The commonly used time formats are listed as follows:[y:Year,M:Month,d:Day of month,H:Hour in day (0-23),m:Minute in hour,s:Second in minute] | +| file_format_type | string | no | "csv" | We supported as the following file types:`text` `json` `csv` `orc` `parquet` `excel`.Please note that, The final file name will end with the file_format's suffix, the suffix of the text file is `txt`. | +| field_delimiter | string | no | '\001' | Only used when file_format is text,The separator between columns in a row of data. Only needed by `text` file format. | +| row_delimiter | string | no | "\n" | Only used when file_format is text,The separator between rows in a file. Only needed by `text` file format. | +| have_partition | boolean | no | false | Whether you need processing partitions. | +| partition_by | array | no | - | Only used then have_partition is true,Partition data based on selected fields. | +| partition_dir_expression | string | no | "${k0}=${v0}/${k1}=${v1}/.../${kn}=${vn}/" | Only used then have_partition is true,If the `partition_by` is specified, we will generate the corresponding partition directory based on the partition information, and the final file will be placed in the partition directory. Default `partition_dir_expression` is `${k0}=${v0}/${k1}=${v1}/.../${kn}=${vn}/`. `k0` is the first partition field and `v0` is the value of the first partition field. | +| is_partition_field_write_in_file | boolean | no | false | Only used when `have_partition` is `true`. If `is_partition_field_write_in_file` is `true`, the partition field and the value of it will be write into data file.For example, if you want to write a Hive Data File, Its value should be `false`. | +| sink_columns | array | no | | When this parameter is empty, all fields are sink columns.Which columns need be write to file, default value is all of the columns get from `Transform` or `Source`. The order of the fields determines the order in which the file is actually written. | +| is_enable_transaction | boolean | no | true | If `is_enable_transaction` is true, we will ensure that data will not be lost or duplicated when it is written to the target directory.Please note that, If `is_enable_transaction` is `true`, we will auto add `${transactionId}_` in the head of the file.Only support `true` now. | +| batch_size | int | no | 1000000 | The maximum number of rows in a file. For SeaTunnel Engine, the number of lines in the file is determined by `batch_size` and `checkpoint.interval` jointly decide. If the value of `checkpoint.interval` is large enough, sink writer will write rows in a file until the rows in the file larger than `batch_size`. If `checkpoint.interval` is small, the sink writer will create a new file when a new checkpoint trigger. | +| compress_codec | string | no | none | The compress codec of files and the details that supported as the following shown:[txt: `lzo` `none`,json: `lzo` `none`,csv: `lzo` `none`,orc: `lzo` `snappy` `lz4` `zlib` `none`,parquet: `lzo` `snappy` `lz4` `gzip` `brotli` `zstd` `none`].Tips: excel type does not support any compression format. | +| kerberos_principal | string | no | - | The principal of kerberos | +| kerberos_keytab_path | string | no | - | The keytab path of kerberos | +| compress_codec | string | no | none | compress codec | +| common-options | object | no | - | Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details | +| max_rows_in_memory | int | no | - | Only used when file_format is excel.When File Format is Excel,The maximum number of data items that can be cached in the memory. | +| sheet_name | string | no | Sheet${Random number} | Only used when file_format is excel.Writer the sheet of the workbook | + +### Tips + +> If you use spark/flink, In order to use this connector, You must ensure your spark/flink cluster already integrated hadoop. The tested hadoop version is 2.x. If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you download and install SeaTunnel Engine. You can check the jar package under ${SEATUNNEL_HOME}/lib to confirm this. + +## Task Example + +### Simple: + +> This example defines a SeaTunnel synchronization task that automatically generates data through FakeSource and sends it to Hdfs. -### sheet_name [string] +``` +# Defining the runtime environment +env { + # You can set flink configuration here + execution.parallelism = 1 + job.mode = "BATCH" +} -Writer the sheet of the workbook +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + FakeSource { + parallelism = 1 + result_table_name = "fake" + row.num = 16 + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_decimal = "decimal(30, 8)" + c_bytes = bytes + c_date = date + c_timestamp = timestamp + } + } + } + # If you would like to get more information about how to configure seatunnel and see full list of source plugins, + # please go to https://seatunnel.apache.org/docs/category/source-v2 +} -## Example +transform { + # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, + # please go to https://seatunnel.apache.org/docs/category/transform-v2 +} -For orc file format simple config +sink { + HdfsFile { + fs.defaultFS = "hdfs://hadoopcluster" + path = "/tmp/hive/warehouse/test2" + file_format = "orc" + } + # If you would like to get more information about how to configure seatunnel and see full list of sink plugins, + # please go to https://seatunnel.apache.org/docs/category/sink-v2 +} +``` -```bash +### For orc file format simple config +``` HdfsFile { fs.defaultFS = "hdfs://hadoopcluster" path = "/tmp/hive/warehouse/test2" - file_format_type = "orc" + file_format = "orc" } - ``` -For text file format with `have_partition` and `custom_filename` and `sink_columns` - -```bash +### For text file format with `have_partition` and `custom_filename` and `sink_columns` +``` HdfsFile { fs.defaultFS = "hdfs://hadoopcluster" path = "/tmp/hive/warehouse/test2" @@ -223,13 +154,11 @@ HdfsFile { sink_columns = ["name","age"] is_enable_transaction = true } - ``` -For parquet file format with `have_partition` and `custom_filename` and `sink_columns` - -```bash +### For parquet file format with `have_partition` and `custom_filename` and `sink_columns` +``` HdfsFile { fs.defaultFS = "hdfs://hadoopcluster" path = "/tmp/hive/warehouse/test2" @@ -244,32 +173,27 @@ HdfsFile { sink_columns = ["name","age"] is_enable_transaction = true } - ``` -## Changelog +### For kerberos simple config -### 2.2.0-beta 2022-09-26 - -- Add HDFS File Sink Connector - -### 2.3.0-beta 2022-10-20 - -- [BugFix] Fix the bug of incorrect path in windows environment ([2980](https://github.com/apache/seatunnel/pull/2980)) -- [BugFix] Fix filesystem get error ([3117](https://github.com/apache/seatunnel/pull/3117)) -- [BugFix] Solved the bug of can not parse '\t' as delimiter from config file ([3083](https://github.com/apache/seatunnel/pull/3083)) - -### 2.3.0 2022-12-30 - -- [BugFix] Fixed the following bugs that failed to write data to files ([3258](https://github.com/apache/seatunnel/pull/3258)) - - When field from upstream is null it will throw NullPointerException - - Sink columns mapping failed - - When restore writer from states getting transaction directly failed +``` +HdfsFile { + fs.defaultFS = "hdfs://hadoopcluster" + path = "/tmp/hive/warehouse/test2" + hdfs_site_path = "/path/to/your/hdfs_site_path" + kerberos_principal = "your_principal@EXAMPLE.COM" + kerberos_keytab_path = "/path/to/your/keytab/file.keytab" +} +``` -### Next version +### For compress simple config -- [Improve] Support setting batch size for every file ([3625](https://github.com/apache/seatunnel/pull/3625)) -- [Improve] Support lzo compression for text in file format ([3782](https://github.com/apache/seatunnel/pull/3782)) -- [Improve] Support kerberos authentication ([3840](https://github.com/apache/seatunnel/pull/3840)) -- [Improve] Support file compress ([3899](https://github.com/apache/seatunnel/pull/3899)) +``` +HdfsFile { + fs.defaultFS = "hdfs://hadoopcluster" + path = "/tmp/hive/warehouse/test2" + compress_codec = "lzo" +} +``` diff --git a/docs/en/connector-v2/sink/InfluxDB.md b/docs/en/connector-v2/sink/InfluxDB.md index e824a41fe686..1dba1fbe4dc8 100644 --- a/docs/en/connector-v2/sink/InfluxDB.md +++ b/docs/en/connector-v2/sink/InfluxDB.md @@ -22,7 +22,6 @@ Write data to InfluxDB. | key_time | string | no | processing time | | key_tags | array | no | exclude `field` & `key_time` | | batch_size | int | no | 1024 | -| batch_interval_ms | int | no | - | | max_retries | int | no | - | | retry_backoff_multiplier_ms | int | no | - | | connect_timeout_ms | long | no | 15000 | @@ -63,11 +62,7 @@ If not specified, include all fields with `influxDB` measurement field ### batch_size [int] -For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms`, the data will be flushed into the influxDB - -### batch_interval_ms [int] - -For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms`, the data will be flushed into the influxDB +For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `checkpoint.interval`, the data will be flushed into the influxDB ### max_retries [int] diff --git a/docs/en/connector-v2/sink/IoTDB.md b/docs/en/connector-v2/sink/IoTDB.md index d60021719e80..554d0bfd06ed 100644 --- a/docs/en/connector-v2/sink/IoTDB.md +++ b/docs/en/connector-v2/sink/IoTDB.md @@ -2,193 +2,190 @@ > IoTDB sink connector -## Description +## Support Those Engines -Used to write data to IoTDB. +> Spark
+> Flink
+> SeaTunnel Zeta
-:::tip - -There is a conflict of thrift version between IoTDB and Spark.Therefore, you need to execute `rm -f $SPARK_HOME/jars/libthrift*` and `cp $IOTDB_HOME/lib/libthrift* $SPARK_HOME/jars/` to resolve it. - -::: - -## Key features +## Key Features - [x] [exactly-once](../../concept/connector-v2-features.md) IoTDB supports the `exactly-once` feature through idempotent writing. If two pieces of data have the same `key` and `timestamp`, the new data will overwrite the old one. -## Options - -| name | type | required | default value | -|-----------------------------|---------|----------|--------------------------------| -| node_urls | list | yes | - | -| username | string | yes | - | -| password | string | yes | - | -| key_device | string | yes | - | -| key_timestamp | string | no | processing time | -| key_measurement_fields | array | no | exclude `device` & `timestamp` | -| storage_group | string | no | - | -| batch_size | int | no | 1024 | -| batch_interval_ms | int | no | - | -| max_retries | int | no | - | -| retry_backoff_multiplier_ms | int | no | - | -| max_retry_backoff_ms | int | no | - | -| default_thrift_buffer_size | int | no | - | -| max_thrift_frame_size | int | no | - | -| zone_id | string | no | - | -| enable_rpc_compression | boolean | no | - | -| connection_timeout_in_ms | int | no | - | -| common-options | | no | - | - -### node_urls [list] - -`IoTDB` cluster address, the format is `["host:port", ...]` - -### username [string] - -`IoTDB` user username - -### password [string] - -`IoTDB` user password - -### key_device [string] - -Specify field name of the `IoTDB` deviceId in SeaTunnelRow - -### key_timestamp [string] - -Specify field-name of the `IoTDB` timestamp in SeaTunnelRow. If not specified, use processing-time as timestamp - -### key_measurement_fields [array] - -Specify field-name of the `IoTDB` measurement list in SeaTunnelRow. If not specified, include all fields but exclude `device` & `timestamp` - -### storage_group [string] - -Specify device storage group(path prefix) - -example: deviceId = ${storage_group} + "." + ${key_device} - -### batch_size [int] - -For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms`, the data will be flushed into the IoTDB - -### batch_interval_ms [int] - -For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms`, the data will be flushed into the IoTDB - -### max_retries [int] - -The number of retries to flush failed - -### retry_backoff_multiplier_ms [int] - -Using as a multiplier for generating the next delay for backoff - -### max_retry_backoff_ms [int] - -The amount of time to wait before attempting to retry a request to `IoTDB` - -### default_thrift_buffer_size [int] +## Description -Thrift init buffer size in `IoTDB` client +Used to write data to IoTDB. -### max_thrift_frame_size [int] +:::tip -Thrift max frame size in `IoTDB` client +There is a conflict of thrift version between IoTDB and Spark.Therefore, you need to execute `rm -f $SPARK_HOME/jars/libthrift*` and `cp $IOTDB_HOME/lib/libthrift* $SPARK_HOME/jars/` to resolve it. -### zone_id [string] +::: -java.time.ZoneId in `IoTDB` client +## Supported DataSource Info + +| Datasource | Supported Versions | Url | +|------------|--------------------|----------------| +| IoTDB | `>= 0.13.0` | localhost:6667 | + +## Database Dependency + +## Data Type Mapping + +| IotDB Data type | SeaTunnel Data type | +|-----------------|---------------------| +| BOOLEAN | BOOLEAN | +| INT32 | TINYINT | +| INT32 | SMALLINT | +| INT32 | INT | +| INT64 | BIGINT | +| FLOAT | FLOAT | +| DOUBLE | DOUBLE | +| TEXT | STRING | + +## Sink Options + +| Name | Type | Required | Default | Description | +|-----------------------------|---------|----------|--------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| node_urls | String | Yes | - | `IoTDB` cluster address, the format is `"host1:port"` or `"host1:port,host2:port"` | +| username | String | Yes | - | `IoTDB` user username | +| password | String | Yes | - | `IoTDB` user password | +| key_device | String | Yes | - | Specify field name of the `IoTDB` deviceId in SeaTunnelRow | +| key_timestamp | String | No | processing time | Specify field-name of the `IoTDB` timestamp in SeaTunnelRow. If not specified, use processing-time as timestamp | +| key_measurement_fields | Array | No | exclude `device` & `timestamp` | Specify field-name of the `IoTDB` measurement list in SeaTunnelRow. If not specified, include all fields but exclude `device` & `timestamp` | +| storage_group | Array | No | - | Specify device storage group(path prefix)
example: deviceId = ${storage_group} + "." + ${key_device} | +| batch_size | Integer | No | 1024 | For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms`, the data will be flushed into the IoTDB | +| max_retries | Integer | No | - | The number of retries to flush failed | +| retry_backoff_multiplier_ms | Integer | No | - | Using as a multiplier for generating the next delay for backoff | +| max_retry_backoff_ms | Integer | No | - | The amount of time to wait before attempting to retry a request to `IoTDB` | +| default_thrift_buffer_size | Integer | No | - | Thrift init buffer size in `IoTDB` client | +| max_thrift_frame_size | Integer | No | - | Thrift max frame size in `IoTDB` client | +| zone_id | string | No | - | java.time.ZoneId in `IoTDB` client | +| enable_rpc_compression | Boolean | No | - | Enable rpc compression in `IoTDB` client | +| connection_timeout_in_ms | Integer | No | - | The maximum time (in ms) to wait when connecting to `IoTDB` | +| common-options | | no | - | Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details | -### enable_rpc_compression [boolean] +## Examples -Enable rpc compression in `IoTDB` client +```hocon +env { + execution.parallelism = 2 + job.mode = "BATCH" +} -### connection_timeout_in_ms [int] +source { + FakeSource { + row.num = 16 + bigint.template = [1664035200001] + schema = { + fields { + device_name = "string" + temperature = "float" + moisture = "int" + event_ts = "bigint" + c_string = "string" + c_boolean = "boolean" + c_tinyint = "tinyint" + c_smallint = "smallint" + c_int = "int" + c_bigint = "bigint" + c_float = "float" + c_double = "double" + } + } + } +} -The maximum time (in ms) to wait when connecting to `IoTDB` +... -### common options +``` -Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details +Upstream SeaTunnelRow data format is the following: -## Examples +| device_name | temperature | moisture | event_ts | c_string | c_boolean | c_tinyint | c_smallint | c_int | c_bigint | c_float | c_double | +|--------------------------|-------------|----------|---------------|----------|-----------|-----------|------------|-------|------------|---------|----------| +| root.test_group.device_a | 36.1 | 100 | 1664035200001 | abc1 | true | 1 | 1 | 1 | 2147483648 | 1.0 | 1.0 | +| root.test_group.device_b | 36.2 | 101 | 1664035200001 | abc2 | false | 2 | 2 | 2 | 2147483649 | 2.0 | 2.0 | +| root.test_group.device_c | 36.3 | 102 | 1664035200001 | abc3 | false | 3 | 3 | 3 | 2147483649 | 3.0 | 3.0 | ### Case1 -Common options: +only fill required config. +use current processing time as timestamp. and include all fields but exclude `device` & `timestamp` as measurement fields ```hocon sink { IoTDB { - node_urls = ["localhost:6667"] + node_urls = "localhost:6667" username = "root" password = "root" - batch_size = 1024 - batch_interval_ms = 1000 + key_device = "device_name" # specify the `deviceId` use device_name field } } ``` -When you assign `key_device` is `device_name`, for example: +Output to `IoTDB` data format is the following: + +```shell +IoTDB> SELECT * FROM root.test_group.* align by device; ++------------------------+------------------------+--------------+-----------+--------------+---------+----------+----------+-----------+------+-----------+--------+---------+ +| Time| Device| temperature| moisture| event_ts| c_string| c_boolean| c_tinyint| c_smallint| c_int| c_bigint| c_float| c_double| ++------------------------+------------------------+--------------+-----------+--------------+---------+----------+----------+-----------+------+-----------+--------+---------+ +|2023-09-01T00:00:00.001Z|root.test_group.device_a| 36.1| 100| 1664035200001| abc1| true| 1| 1| 1| 2147483648| 1.0| 1.0| +|2023-09-01T00:00:00.001Z|root.test_group.device_b| 36.2| 101| 1664035200001| abc2| false| 2| 2| 2| 2147483649| 2.0| 2.0| +|2023-09-01T00:00:00.001Z|root.test_group.device_c| 36.3| 102| 1664035200001| abc2| false| 3| 3| 3| 2147483649| 3.0| 3.0| ++------------------------+------------------------+--------------+-----------+--------------+---------+---------+-----------+-----------+------+-----------+--------+---------+ +``` + +### Case2 + +use source event's time ```hocon sink { IoTDB { - ... - key_device = "device_name" + node_urls = "localhost:6667" + username = "root" + password = "root" + key_device = "device_name" # specify the `deviceId` use device_name field + key_timestamp = "event_ts" # specify the `timestamp` use event_ts field } } ``` -Upstream SeaTunnelRow data format is the following: - -| device_name | field_1 | field_2 | -|--------------------------|---------|---------| -| root.test_group.device_a | 1001 | 1002 | -| root.test_group.device_b | 2001 | 2002 | -| root.test_group.device_c | 3001 | 3002 | - Output to `IoTDB` data format is the following: ```shell IoTDB> SELECT * FROM root.test_group.* align by device; -+------------------------+------------------------+-----------+----------+ -| Time| Device| field_1| field_2| -+------------------------+------------------------+----------+-----------+ -|2022-09-26T17:50:01.201Z|root.test_group.device_a| 1001| 1002| -|2022-09-26T17:50:01.202Z|root.test_group.device_b| 2001| 2002| -|2022-09-26T17:50:01.203Z|root.test_group.device_c| 3001| 3002| -+------------------------+------------------------+----------+-----------+ ++------------------------+------------------------+--------------+-----------+--------------+---------+----------+----------+-----------+------+-----------+--------+---------+ +| Time| Device| temperature| moisture| event_ts| c_string| c_boolean| c_tinyint| c_smallint| c_int| c_bigint| c_float| c_double| ++------------------------+------------------------+--------------+-----------+--------------+---------+----------+----------+-----------+------+-----------+--------+---------+ +|2022-09-25T00:00:00.001Z|root.test_group.device_a| 36.1| 100| 1664035200001| abc1| true| 1| 1| 1| 2147483648| 1.0| 1.0| +|2022-09-25T00:00:00.001Z|root.test_group.device_b| 36.2| 101| 1664035200001| abc2| false| 2| 2| 2| 2147483649| 2.0| 2.0| +|2022-09-25T00:00:00.001Z|root.test_group.device_c| 36.3| 102| 1664035200001| abc2| false| 3| 3| 3| 2147483649| 3.0| 3.0| ++------------------------+------------------------+--------------+-----------+--------------+---------+---------+-----------+-----------+------+-----------+--------+---------+ ``` -### Case2 +### Case3 -When you assign `key_device`、`key_timestamp`、`key_measurement_fields`, for example: +use source event's time and limit measurement fields ```hocon sink { IoTDB { - ... + node_urls = "localhost:6667" + username = "root" + password = "root" key_device = "device_name" - key_timestamp = "ts" + key_timestamp = "event_ts" key_measurement_fields = ["temperature", "moisture"] } } ``` -Upstream SeaTunnelRow data format is the following: - -| ts | device_name | field_1 | field_2 | temperature | moisture | -|---------------|--------------------------|---------|---------|-------------|----------| -| 1664035200001 | root.test_group.device_a | 1001 | 1002 | 36.1 | 100 | -| 1664035200001 | root.test_group.device_b | 2001 | 2002 | 36.2 | 101 | -| 1664035200001 | root.test_group.device_c | 3001 | 3002 | 36.3 | 102 | - Output to `IoTDB` data format is the following: ```shell diff --git a/docs/en/connector-v2/sink/Jdbc.md b/docs/en/connector-v2/sink/Jdbc.md index 9d68278cf51e..394fadde8018 100644 --- a/docs/en/connector-v2/sink/Jdbc.md +++ b/docs/en/connector-v2/sink/Jdbc.md @@ -41,13 +41,13 @@ support `Xa transactions`. You can set `is_exactly_once=true` to enable it. | connection_check_timeout_sec | Int | No | 30 | | max_retries | Int | No | 0 | | batch_size | Int | No | 1000 | -| batch_interval_ms | Int | No | 1000 | | is_exactly_once | Boolean | No | false | | generate_sink_sql | Boolean | No | false | | xa_data_source_class_name | String | No | - | | max_commit_attempts | Int | No | 3 | | transaction_timeout_sec | Int | No | -1 | | auto_commit | Boolean | No | true | +| field_ide | String | No | - | | common-options | | no | - | ### driver [string] @@ -107,12 +107,7 @@ The number of retries to submit failed (executeBatch) ### batch_size[int] -For batch writing, when the number of buffered records reaches the number of `batch_size` or the time reaches `batch_interval_ms` -, the data will be flushed into the database - -### batch_interval_ms[int] - -For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms` +For batch writing, when the number of buffered records reaches the number of `batch_size` or the time reaches `checkpoint.interval` , the data will be flushed into the database ### is_exactly_once[boolean] @@ -142,6 +137,12 @@ exactly-once semantics Automatic transaction commit is enabled by default +### field_ide [String] + +The field "field_ide" is used to identify whether the field needs to be converted to uppercase or lowercase when +synchronizing from the source to the sink. "ORIGINAL" indicates no conversion is needed, "UPPERCASE" indicates +conversion to uppercase, and "LOWERCASE" indicates conversion to lowercase. + ### common options Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details @@ -175,6 +176,7 @@ there are some reference value for params above. | Redshift | com.amazon.redshift.jdbc42.Driver | jdbc:redshift://localhost:5439/testdb | com.amazon.redshift.xa.RedshiftXADataSource | https://mvnrepository.com/artifact/com.amazon.redshift/redshift-jdbc42 | | Snowflake | net.snowflake.client.jdbc.SnowflakeDriver | jdbc:snowflake://.snowflakecomputing.com | / | https://mvnrepository.com/artifact/net.snowflake/snowflake-jdbc | | Vertica | com.vertica.jdbc.Driver | jdbc:vertica://localhost:5433 | / | https://repo1.maven.org/maven2/com/vertica/jdbc/vertica-jdbc/12.0.3-0/vertica-jdbc-12.0.3-0.jar | +| Kingbase | com.kingbase8.Driver | jdbc:kingbase8://localhost:54321/db_test | / | https://repo1.maven.org/maven2/cn/com/kingbase/kingbase8/8.6.0/kingbase8-8.6.0.jar | | OceanBase | com.oceanbase.jdbc.Driver | jdbc:oceanbase://localhost:2881 | / | https://repo1.maven.org/maven2/com/oceanbase/oceanbase-client/2.4.3/oceanbase-client-2.4.3.jar | ## Example diff --git a/docs/en/connector-v2/sink/Kingbase.md b/docs/en/connector-v2/sink/Kingbase.md new file mode 100644 index 000000000000..b92b12fc4200 --- /dev/null +++ b/docs/en/connector-v2/sink/Kingbase.md @@ -0,0 +1,168 @@ +# Kingbase + +> JDBC Kingbase Sink Connector + +## Support Connector Version + +- 8.6 + +## Support Those Engines + +> Spark
+> Flink
+> SeaTunnel Zeta
+ +## Key Features + +- [ ] [exactly-once](../../concept/connector-v2-features.md) +- [ ] [cdc](../../concept/connector-v2-features.md) + +## Description + +> Use `Xa transactions` to ensure `exactly-once`. So only support `exactly-once` for the database which is +> support `Xa transactions`. You can set `is_exactly_once=true` to enable it.Kingbase currently does not support + +## Supported DataSource Info + +| Datasource | Supported versions | Driver | Url | Maven | +|------------|--------------------|----------------------|------------------------------------------|------------------------------------------------------------------------------------------------| +| Kingbase | 8.6 | com.kingbase8.Driver | jdbc:kingbase8://localhost:54321/db_test | [Download](https://repo1.maven.org/maven2/cn/com/kingbase/kingbase8/8.6.0/kingbase8-8.6.0.jar) | + +## Database Dependency + +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' +> working directory
+> For example: cp kingbase8-8.6.0.jar $SEATNUNNEL_HOME/plugins/jdbc/lib/ + +## Data Type Mapping + +| Kingbase Data type | SeaTunnel Data type | +|----------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------| +| BOOL | BOOLEAN | +| INT2 | SHORT | +| SMALLSERIAL
SERIAL
INT4 | INT | +| INT8
BIGSERIAL | BIGINT | +| FLOAT4 | FLOAT | +| FLOAT8 | DOUBLE | +| NUMERIC | DECIMAL((Get the designated column's specified column size),
(Gets the designated column's number of digits to right of the decimal point.))) | +| BPCHAR
CHARACTER
VARCHAR
TEXT | STRING | +| TIMESTAMP | LOCALDATETIME | +| TIME | LOCALTIME | +| DATE | LOCALDATE | +| Other data type | Not supported yet | + +## Sink Options + +| Name | Type | Required | Default | Description | +|-------------------------------------------|---------|----------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:db2://127.0.0.1:50000/dbname | +| driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
if you use DB2 the value is `com.ibm.db2.jdbc.app.DB2Driver`. | +| user | String | No | - | Connection instance user name | +| password | String | No | - | Connection instance password | +| query | String | No | - | Use this sql write upstream input datas to database. e.g `INSERT ...`,`query` have the higher priority | +| database | String | No | - | Use this `database` and `table-name` auto-generate sql and receive upstream input datas write to database.
This option is mutually exclusive with `query` and has a higher priority. | +| table | String | No | - | Use database and this table-name auto-generate sql and receive upstream input datas write to database.
This option is mutually exclusive with `query` and has a higher priority. | +| primary_keys | Array | No | - | This option is used to support operations such as `insert`, `delete`, and `update` when automatically generate sql. | +| support_upsert_by_query_primary_key_exist | Boolean | No | false | Choose to use INSERT sql, UPDATE sql to process update events(INSERT, UPDATE_AFTER) based on query primary key exists. This configuration is only used when database unsupport upsert syntax. **Note**: that this method has low performance | +| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete. | +| max_retries | Int | No | 0 | The number of retries to submit failed (executeBatch) | +| batch_size | Int | No | 1000 | For batch writing, when the number of buffered records reaches the number of `batch_size` or the time reaches `checkpoint.interval`
, the data will be flushed into the database | +| is_exactly_once | Boolean | No | false | Whether to enable exactly-once semantics, which will use Xa transactions. If on, you need to
set `xa_data_source_class_name`. Kingbase currently does not support | +| generate_sink_sql | Boolean | No | false | Generate sql statements based on the database table you want to write to | +| xa_data_source_class_name | String | No | - | The xa data source class name of the database Driver,Kingbase currently does not support | +| max_commit_attempts | Int | No | 3 | The number of retries for transaction commit failures | +| transaction_timeout_sec | Int | No | -1 | The timeout after the transaction is opened, the default is -1 (never timeout). Note that setting the timeout may affect
exactly-once semantics | +| auto_commit | Boolean | No | true | Automatic transaction commit is enabled by default | +| common-options | | no | - | Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details | + +### Tips + +> If partition_column is not set, it will run in single concurrency, and if partition_column is set, it will be executed +> in parallel according to the concurrency of tasks. + +## Task Example + +### Simple: + +> This example defines a SeaTunnel synchronization task that automatically generates data through FakeSource and sends +> it to JDBC Sink. FakeSource generates a total of 16 rows of data (row.num=16), with each row having 12 fields. The final target table is test_table will also be 16 rows of data in the table. +> Before +> run this job, you need create database test and table test_table in your Kingbase. And if you have not yet installed and +> deployed SeaTunnel, you need to follow the instructions in [Install SeaTunnel](../../start-v2/locally/deployment.md) +> to +> install and deploy SeaTunnel. And then follow the instructions +> in [Quick Start With SeaTunnel Engine](../../start-v2/locally/quick-start-seatunnel-engine.md) to run this job. + +``` +# Defining the runtime environment +env { + # You can set flink configuration here + execution.parallelism = 1 + job.mode = "BATCH" +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + FakeSource { + parallelism = 1 + result_table_name = "fake" + row.num = 16 + schema = { + fields { + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_decimal = "decimal(30, 8)" + c_date = date + c_time = time + c_timestamp = timestamp + } + } + } + # If you would like to get more information about how to configure seatunnel and see full list of source plugins, + # please go to https://seatunnel.apache.org/docs/category/source-v2 +} + +transform { + # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, + # please go to https://seatunnel.apache.org/docs/category/transform-v2 +} + +sink { + jdbc { + url = "jdbc:kingbase8://127.0.0.1:54321/dbname" + driver = "com.kingbase8.Driver" + user = "root" + password = "123456" + query = "insert into test_table(c_string,c_boolean,c_tinyint,c_smallint,c_int,c_bigint,c_float,c_double,c_decimal,c_date,c_time,c_timestamp) values(?,?,?,?,?,?,?,?,?,?,?,?)" + } + # If you would like to get more information about how to configure seatunnel and see full list of sink plugins, + # please go to https://seatunnel.apache.org/docs/category/sink-v2 +} +``` + +### Generate Sink SQL + +> This example not need to write complex sql statements, you can configure the database name table name to automatically +> generate add statements for you + +``` +sink { + jdbc { + url = "jdbc:kingbase8://127.0.0.1:54321/dbname" + driver = "com.kingbase8.Driver" + user = "root" + password = "123456" + # Automatically generate sql statements based on database table names + generate_sink_sql = true + database = test + table = test_table + } +} +``` + diff --git a/docs/en/connector-v2/sink/Mysql.md b/docs/en/connector-v2/sink/Mysql.md index 92254c1b54fa..860f071df0e0 100644 --- a/docs/en/connector-v2/sink/Mysql.md +++ b/docs/en/connector-v2/sink/Mysql.md @@ -2,6 +2,10 @@ > JDBC Mysql Sink Connector +## Support Mysql Version + +- 5.5/5.6/5.7/8.0 + ## Support Those Engines > Spark
@@ -67,14 +71,14 @@ semantics (using XA transaction guarantee). | support_upsert_by_query_primary_key_exist | Boolean | No | false | Choose to use INSERT sql, UPDATE sql to process update events(INSERT, UPDATE_AFTER) based on query primary key exists. This configuration is only used when database unsupport upsert syntax. **Note**: that this method has low performance | | connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete. | | max_retries | Int | No | 0 | The number of retries to submit failed (executeBatch) | -| batch_size | Int | No | 1000 | For batch writing, when the number of buffered records reaches the number of `batch_size` or the time reaches `batch_interval_ms`
, the data will be flushed into the database | -| batch_interval_ms | Int | No | 1000 | For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms`, the data will be flushed into the database | +| batch_size | Int | No | 1000 | For batch writing, when the number of buffered records reaches the number of `batch_size` or the time reaches `checkpoint.interval`
, the data will be flushed into the database | | is_exactly_once | Boolean | No | false | Whether to enable exactly-once semantics, which will use Xa transactions. If on, you need to
set `xa_data_source_class_name`. | | generate_sink_sql | Boolean | No | false | Generate sql statements based on the database table you want to write to | | xa_data_source_class_name | String | No | - | The xa data source class name of the database Driver, for example, mysql is `com.mysql.cj.jdbc.MysqlXADataSource`, and
please refer to appendix for other data sources | | max_commit_attempts | Int | No | 3 | The number of retries for transaction commit failures | | transaction_timeout_sec | Int | No | -1 | The timeout after the transaction is opened, the default is -1 (never timeout). Note that setting the timeout may affect
exactly-once semantics | | auto_commit | Boolean | No | true | Automatic transaction commit is enabled by default | +| field_ide | String | No | - | Identify whether the field needs to be converted when synchronizing from the source to the sink. `ORIGINAL` indicates no conversion is needed;`UPPERCASE` indicates conversion to uppercase;`LOWERCASE` indicates conversion to lowercase. | | common-options | | no | - | Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details | ### Tips @@ -119,7 +123,7 @@ transform { sink { jdbc { - url = "jdbc:mysql://localhost:3306/test" + url = "jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=UTF-8&rewriteBatchedStatements=true" driver = "com.mysql.cj.jdbc.Driver" user = "root" password = "123456" @@ -137,7 +141,7 @@ sink { ``` sink { jdbc { - url = "jdbc:mysql://localhost:3306/test" + url = "jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=UTF-8&rewriteBatchedStatements=true" driver = "com.mysql.cj.jdbc.Driver" user = "root" password = "123456" @@ -156,7 +160,7 @@ sink { ``` sink { jdbc { - url = "jdbc:mysql://localhost:3306/test" + url = "jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=UTF-8&rewriteBatchedStatements=true" driver = "com.mysql.cj.jdbc.Driver" max_retries = 0 @@ -178,7 +182,7 @@ sink { ``` sink { jdbc { - url = "jdbc:mysql://localhost:3306/test" + url = "jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=UTF-8&rewriteBatchedStatements=true" driver = "com.mysql.cj.jdbc.Driver" user = "root" password = "123456" @@ -188,6 +192,7 @@ sink { database = test table = sink_table primary_keys = ["id","name"] + field_ide = UPPERCASE } } ``` diff --git a/docs/en/connector-v2/sink/OceanBase.md b/docs/en/connector-v2/sink/OceanBase.md index ec87ce3d36d1..3cea0b5e6e6d 100644 --- a/docs/en/connector-v2/sink/OceanBase.md +++ b/docs/en/connector-v2/sink/OceanBase.md @@ -81,8 +81,7 @@ Write data through jdbc. Support Batch mode and Streaming mode, support concurre | support_upsert_by_query_primary_key_exist | Boolean | No | false | Choose to use INSERT sql, UPDATE sql to process update events(INSERT, UPDATE_AFTER) based on query primary key exists. This configuration is only used when database unsupport upsert syntax. **Note**: that this method has low performance | | connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete. | | max_retries | Int | No | 0 | The number of retries to submit failed (executeBatch) | -| batch_size | Int | No | 1000 | For batch writing, when the number of buffered records reaches the number of `batch_size` or the time reaches `batch_interval_ms`
, the data will be flushed into the database | -| batch_interval_ms | Int | No | 1000 | For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms`, the data will be flushed into the database | +| batch_size | Int | No | 1000 | For batch writing, when the number of buffered records reaches the number of `batch_size` or the time reaches `checkpoint.interval`
, the data will be flushed into the database | | generate_sink_sql | Boolean | No | false | Generate sql statements based on the database table you want to write to | | max_commit_attempts | Int | No | 3 | The number of retries for transaction commit failures | | transaction_timeout_sec | Int | No | -1 | The timeout after the transaction is opened, the default is -1 (never timeout). Note that setting the timeout may affect
exactly-once semantics | diff --git a/docs/en/connector-v2/sink/Oracle.md b/docs/en/connector-v2/sink/Oracle.md new file mode 100644 index 000000000000..151243f318fb --- /dev/null +++ b/docs/en/connector-v2/sink/Oracle.md @@ -0,0 +1,191 @@ +# Oracle + +> JDBC Oracle Sink Connector + +## Support Those Engines + +> Spark
+> Flink
+> SeaTunnel Zeta
+ +## Key Features + +- [x] [exactly-once](../../concept/connector-v2-features.md) +- [x] [cdc](../../concept/connector-v2-features.md) + +> Use `Xa transactions` to ensure `exactly-once`. So only support `exactly-once` for the database which is +> support `Xa transactions`. You can set `is_exactly_once=true` to enable it. + +## Description + +Write data through jdbc. Support Batch mode and Streaming mode, support concurrent writing, support exactly-once +semantics (using XA transaction guarantee). + +## Supported DataSource Info + +| Datasource | Supported Versions | Driver | Url | Maven | +|------------|----------------------------------------------------------|--------------------------|----------------------------------------|--------------------------------------------------------------------| +| Oracle | Different dependency version has different driver class. | oracle.jdbc.OracleDriver | jdbc:oracle:thin:@datasource01:1523:xe | https://mvnrepository.com/artifact/com.oracle.database.jdbc/ojdbc8 | + +## Database Dependency + +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory
+> For example Oracle datasource: cp ojdbc8-xxxxxx.jar $SEATNUNNEL_HOME/lib/
+> To support the i18n character set, copy the orai18n.jar to the $SEATNUNNEL_HOME/lib/ directory. + +## Data Type Mapping + +| Oracle Data type | SeaTunnel Data type | +|--------------------------------------------------------------------------------------|---------------------| +| INTEGER | INT | +| FLOAT | DECIMAL(38, 18) | +| NUMBER(precision <= 9, scale == 0) | INT | +| NUMBER(9 < precision <= 18, scale == 0) | BIGINT | +| NUMBER(18 < precision, scale == 0) | DECIMAL(38, 0) | +| NUMBER(scale != 0) | DECIMAL(38, 18) | +| BINARY_DOUBLE | DOUBLE | +| BINARY_FLOAT
REAL | FLOAT | +| CHAR
NCHAR
NVARCHAR2
VARCHAR2
LONG
ROWID
NCLOB
CLOB
| STRING | +| DATE | DATE | +| TIMESTAMP
TIMESTAMP WITH LOCAL TIME ZONE | TIMESTAMP | +| BLOB
RAW
LONG RAW
BFILE | BYTES | + +## Options + +| Name | Type | Required | Default | Description | +|-------------------------------------------|---------|----------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:oracle:thin:@datasource01:1523:xe | +| driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
if you use Oracle the value is `oracle.jdbc.OracleDriver`. | +| user | String | No | - | Connection instance user name | +| password | String | No | - | Connection instance password | +| query | String | No | - | Use this sql write upstream input datas to database. e.g `INSERT ...`,`query` have the higher priority | +| database | String | No | - | Use this `database` and `table-name` auto-generate sql and receive upstream input datas write to database.
This option is mutually exclusive with `query` and has a higher priority. | +| table | String | No | - | Use database and this table-name auto-generate sql and receive upstream input datas write to database.
This option is mutually exclusive with `query` and has a higher priority. | +| primary_keys | Array | No | - | This option is used to support operations such as `insert`, `delete`, and `update` when automatically generate sql. | +| support_upsert_by_query_primary_key_exist | Boolean | No | false | Choose to use INSERT sql, UPDATE sql to process update events(INSERT, UPDATE_AFTER) based on query primary key exists. This configuration is only used when database unsupport upsert syntax. **Note**: that this method has low performance | +| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete. | +| max_retries | Int | No | 0 | The number of retries to submit failed (executeBatch) | +| batch_size | Int | No | 1000 | For batch writing, when the number of buffered records reaches the number of `batch_size` or the time reaches `batch_interval_ms`
, the data will be flushed into the database | +| batch_interval_ms | Int | No | 1000 | For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms`, the data will be flushed into the database | +| is_exactly_once | Boolean | No | false | Whether to enable exactly-once semantics, which will use Xa transactions. If on, you need to
set `xa_data_source_class_name`. | +| generate_sink_sql | Boolean | No | false | Generate sql statements based on the database table you want to write to. | +| xa_data_source_class_name | String | No | - | The xa data source class name of the database Driver, for example, Oracle is `oracle.jdbc.xa.client.OracleXADataSource`, and
please refer to appendix for other data sources | +| max_commit_attempts | Int | No | 3 | The number of retries for transaction commit failures | +| transaction_timeout_sec | Int | No | -1 | The timeout after the transaction is opened, the default is -1 (never timeout). Note that setting the timeout may affect
exactly-once semantics | +| auto_commit | Boolean | No | true | Automatic transaction commit is enabled by default | +| common-options | | no | - | Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details | + +### Tips + +> If partition_column is not set, it will run in single concurrency, and if partition_column is set, it will be executed in parallel according to the concurrency of tasks. + +## Task Example + +### Simple: + +> This example defines a SeaTunnel synchronization task that automatically generates data through FakeSource and sends it to JDBC Sink. FakeSource generates a total of 16 rows of data (row.num=16), with each row having two fields, name (string type) and age (int type). The final target table is test_table will also be 16 rows of data in the table. Before run this job, you need create database test and table test_table in your Oracle. And if you have not yet installed and deployed SeaTunnel, you need to follow the instructions in [Install SeaTunnel](../../start-v2/locally/deployment.md) to install and deploy SeaTunnel. And then follow the instructions in [Quick Start With SeaTunnel Engine](../../start-v2/locally/quick-start-seatunnel-engine.md) to run this job. + +``` +# Defining the runtime environment +env { + # You can set flink configuration here + execution.parallelism = 1 + job.mode = "BATCH" +} + +source { + FakeSource { + parallelism = 1 + result_table_name = "fake" + row.num = 16 + schema = { + fields { + name = "string" + age = "int" + } + } + } + # If you would like to get more information about how to configure seatunnel and see full list of source plugins, + # please go to https://seatunnel.apache.org/docs/category/source-v2 +} + +transform { + # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, + # please go to https://seatunnel.apache.org/docs/category/transform-v2 +} + +sink { + jdbc { + url = "jdbc:oracle:thin:@datasource01:1523:xe" + driver = "oracle.jdbc.OracleDriver" + user = root + password = 123456 + query = "INSERT INTO TEST.TEST_TABLE(NAME,AGE) VALUES(?,?)" + } + # If you would like to get more information about how to configure seatunnel and see full list of sink plugins, + # please go to https://seatunnel.apache.org/docs/category/sink-v2 +} +``` + +### Generate Sink SQL + +> This example not need to write complex sql statements, you can configure the database name table name to automatically generate add statements for you + +``` +sink { + Jdbc { + url = "jdbc:oracle:thin:@datasource01:1523:xe" + driver = "oracle.jdbc.OracleDriver" + user = root + password = 123456 + + generate_sink_sql = true + database = XE + table = "TEST.TEST_TABLE" + } +} +``` + +### Exactly-once : + +> For accurate write scene we guarantee accurate once + +``` +sink { + jdbc { + url = "jdbc:oracle:thin:@datasource01:1523:xe" + driver = "oracle.jdbc.OracleDriver" + + max_retries = 0 + user = root + password = 123456 + query = "INSERT INTO TEST.TEST_TABLE(NAME,AGE) VALUES(?,?)" + + is_exactly_once = "true" + + xa_data_source_class_name = "oracle.jdbc.xa.client.OracleXADataSource" + } +} +``` + +### CDC(Change Data Capture) Event + +> CDC change data is also supported by us In this case, you need config database, table and primary_keys. + +``` +sink { + jdbc { + url = "jdbc:oracle:thin:@datasource01:1523:xe" + driver = "oracle.jdbc.OracleDriver" + user = root + password = 123456 + + generate_sink_sql = true + # You need to configure both database and table + database = XE + table = "TEST.TEST_TABLE" + primary_keys = ["ID"] + } +} +``` + diff --git a/docs/en/connector-v2/sink/PostgreSql.md b/docs/en/connector-v2/sink/PostgreSql.md index f7d6469b60fc..bcc5616f5ea1 100644 --- a/docs/en/connector-v2/sink/PostgreSql.md +++ b/docs/en/connector-v2/sink/PostgreSql.md @@ -36,34 +36,34 @@ semantics (using XA transaction guarantee). ## Data Type Mapping -| PostgreSQL Data type | SeaTunnel Data type | -|----------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------| -| BOOL
| BOOLEAN | -| _BOOL
| ARRAY<BOOLEAN> | -| BYTEA
| BYTES | -| _BYTEA
| ARRAY<TINYINT> | -| INT2
SMALLSERIAL
INT4
SERIAL
| INT | -| _INT2
_INT4
| ARRAY<INT> | -| INT8
BIGSERIAL
| BIGINT | -| _INT8
| ARRAY<BIGINT> | -| FLOAT4
| FLOAT | -| _FLOAT4
| ARRAY<FLOAT> | -| FLOAT8
| DOUBLE | -| _FLOAT8
| ARRAY<DOUBLE> | -| NUMERIC(Get the designated column's specified column size>0) | DECIMAL(Get the designated column's specified column size,Gets the number of digits in the specified column to the right of the decimal point) | -| NUMERIC(Get the designated column's specified column size<0) | DECIMAL(38, 18) | -| BPCHAR
CHARACTER
VARCHAR
TEXT
GEOMETRY
GEOGRAPHY | STRING | -| _BPCHAR
_CHARACTER
_VARCHAR
_TEXT | ARRAY<STRING> | -| TIMESTAMP
| TIMESTAMP | -| TIME
| TIME | -| DATE
| DATE | -| OTHER DATA TYPES | NOT SUPPORTED YET | +| PostgreSQL Data type | SeaTunnel Data type | +|-----------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------| +| BOOL
| BOOLEAN | +| _BOOL
| ARRAY<BOOLEAN> | +| BYTEA
| BYTES | +| _BYTEA
| ARRAY<TINYINT> | +| INT2
SMALLSERIAL
INT4
SERIAL
| INT | +| _INT2
_INT4
| ARRAY<INT> | +| INT8
BIGSERIAL
| BIGINT | +| _INT8
| ARRAY<BIGINT> | +| FLOAT4
| FLOAT | +| _FLOAT4
| ARRAY<FLOAT> | +| FLOAT8
| DOUBLE | +| _FLOAT8
| ARRAY<DOUBLE> | +| NUMERIC(Get the designated column's specified column size>0) | DECIMAL(Get the designated column's specified column size,Gets the number of digits in the specified column to the right of the decimal point) | +| NUMERIC(Get the designated column's specified column size<0) | DECIMAL(38, 18) | +| BPCHAR
CHARACTER
VARCHAR
TEXT
GEOMETRY
GEOGRAPHY
JSON
JSONB | STRING | +| _BPCHAR
_CHARACTER
_VARCHAR
_TEXT | ARRAY<STRING> | +| TIMESTAMP
| TIMESTAMP | +| TIME
| TIME | +| DATE
| DATE | +| OTHER DATA TYPES | NOT SUPPORTED YET | ## Options | Name | Type | Required | Default | Description | |-------------------------------------------|---------|----------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:postgresql://localhost:5432/test | +| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:postgresql://localhost:5432/test
if you would use json or jsonb type insert please add jdbc url stringtype=unspecified option | | driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
if you use PostgreSQL the value is `org.postgresql.Driver`. | | user | String | No | - | Connection instance user name | | password | String | No | - | Connection instance password | @@ -74,14 +74,14 @@ semantics (using XA transaction guarantee). | support_upsert_by_query_primary_key_exist | Boolean | No | false | Choose to use INSERT sql, UPDATE sql to process update events(INSERT, UPDATE_AFTER) based on query primary key exists. This configuration is only used when database unsupport upsert syntax. **Note**: that this method has low performance | | connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete. | | max_retries | Int | No | 0 | The number of retries to submit failed (executeBatch) | -| batch_size | Int | No | 1000 | For batch writing, when the number of buffered records reaches the number of `batch_size` or the time reaches `batch_interval_ms`
, the data will be flushed into the database | -| batch_interval_ms | Int | No | 1000 | For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms`, the data will be flushed into the database | +| batch_size | Int | No | 1000 | For batch writing, when the number of buffered records reaches the number of `batch_size` or the time reaches `checkpoint.interval`
, the data will be flushed into the database | | is_exactly_once | Boolean | No | false | Whether to enable exactly-once semantics, which will use Xa transactions. If on, you need to
set `xa_data_source_class_name`. | | generate_sink_sql | Boolean | No | false | Generate sql statements based on the database table you want to write to. | | xa_data_source_class_name | String | No | - | The xa data source class name of the database Driver, for example, PostgreSQL is `org.postgresql.xa.PGXADataSource`, and
please refer to appendix for other data sources | | max_commit_attempts | Int | No | 3 | The number of retries for transaction commit failures | | transaction_timeout_sec | Int | No | -1 | The timeout after the transaction is opened, the default is -1 (never timeout). Note that setting the timeout may affect
exactly-once semantics | | auto_commit | Boolean | No | true | Automatic transaction commit is enabled by default | +| field_ide | String | No | - | Identify whether the field needs to be converted when synchronizing from the source to the sink. `ORIGINAL` indicates no conversion is needed;`UPPERCASE` indicates conversion to uppercase;`LOWERCASE` indicates conversion to lowercase. | | common-options | | no | - | Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details | ### Tips @@ -125,6 +125,7 @@ transform { sink { jdbc { + # if you would use json or jsonb type insert please add jdbc url stringtype=unspecified option url = "jdbc:postgresql://localhost:5432/test" driver = "org.postgresql.Driver" user = root @@ -143,6 +144,7 @@ sink { ``` sink { Jdbc { + # if you would use json or jsonb type insert please add jdbc url stringtype=unspecified option url = "jdbc:postgresql://localhost:5432/test" driver = org.postgresql.Driver user = root @@ -162,6 +164,7 @@ sink { ``` sink { jdbc { + # if you would use json or jsonb type insert please add jdbc url stringtype=unspecified option url = "jdbc:postgresql://localhost:5432/test" driver = "org.postgresql.Driver" @@ -184,6 +187,7 @@ sink { ``` sink { jdbc { + # if you would use json or jsonb type insert please add jdbc url stringtype=unspecified option url = "jdbc:postgresql://localhost:5432/test" driver = "org.postgresql.Driver" user = root @@ -194,6 +198,7 @@ sink { database = test table = sink_table primary_keys = ["id","name"] + field_ide = UPPERCASE } } ``` diff --git a/docs/en/connector-v2/sink/Redis.md b/docs/en/connector-v2/sink/Redis.md index fcface7da22a..7d2ef237e1ce 100644 --- a/docs/en/connector-v2/sink/Redis.md +++ b/docs/en/connector-v2/sink/Redis.md @@ -23,6 +23,7 @@ Used to write data to Redis. | mode | string | no | single | | nodes | list | yes when mode=cluster | - | | format | string | no | json | +| expire | long | no | -1 | | common-options | | no | - | ### host [string] @@ -120,6 +121,10 @@ Connector will generate data as the following and write it to redis: ``` +### expire [long] + +Set redis expiration time, the unit is second. The default value is -1, keys do not automatically expire by default. + ### common options Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details diff --git a/docs/en/connector-v2/sink/S3File.md b/docs/en/connector-v2/sink/S3File.md index 7841afdf04e3..4bb670ae38c8 100644 --- a/docs/en/connector-v2/sink/S3File.md +++ b/docs/en/connector-v2/sink/S3File.md @@ -1,24 +1,17 @@ # S3File -> S3 file sink connector +> S3 File Sink Connector -## Description - -Output data to aws s3 file system. - -:::tip +## Support Those Engines -If you use spark/flink, In order to use this connector, You must ensure your spark/flink cluster already integrated hadoop. The tested hadoop version is 2.x. +> Spark
+> Flink
+> SeaTunnel Zeta
-If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you download and install SeaTunnel Engine. You can check the jar package under ${SEATUNNEL_HOME}/lib to confirm this. - -To use this connector you need put hadoop-aws-3.1.4.jar and aws-java-sdk-bundle-1.11.271.jar in ${SEATUNNEL_HOME}/lib dir. - -::: - -## Key features +## Key Features - [x] [exactly-once](../../concept/connector-v2-features.md) +- [ ] [cdc](../../concept/connector-v2-features.md) By default, we use 2PC commit to ensure `exactly-once` @@ -30,59 +23,100 @@ By default, we use 2PC commit to ensure `exactly-once` - [x] json - [x] excel -## Options - -| name | type | required | default value | remarks | -|----------------------------------|---------|----------|-------------------------------------------------------|--------------------------------------------------------------------------------------------------------| -| path | string | yes | - | | -| bucket | string | yes | - | | -| fs.s3a.endpoint | string | yes | - | | -| fs.s3a.aws.credentials.provider | string | yes | com.amazonaws.auth.InstanceProfileCredentialsProvider | | -| access_key | string | no | - | Only used when fs.s3a.aws.credentials.provider = org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider | -| access_secret | string | no | - | Only used when fs.s3a.aws.credentials.provider = org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider | -| custom_filename | boolean | no | false | Whether you need custom the filename | -| file_name_expression | string | no | "${transactionId}" | Only used when custom_filename is true | -| filename_time_format | string | no | "yyyy.MM.dd" | Only used when custom_filename is true | -| file_format_type | string | no | "csv" | | -| field_delimiter | string | no | '\001' | Only used when file_format_type is text | -| row_delimiter | string | no | "\n" | Only used when file_format_type is text | -| have_partition | boolean | no | false | Whether you need processing partitions. | -| partition_by | array | no | - | Only used then have_partition is true | -| partition_dir_expression | string | no | "${k0}=${v0}/${k1}=${v1}/.../${kn}=${vn}/" | Only used then have_partition is true | -| is_partition_field_write_in_file | boolean | no | false | Only used then have_partition is true | -| sink_columns | array | no | | When this parameter is empty, all fields are sink columns | -| is_enable_transaction | boolean | no | true | | -| batch_size | int | no | 1000000 | | -| compress_codec | string | no | none | | -| common-options | object | no | - | | -| max_rows_in_memory | int | no | - | Only used when file_format_type is excel. | -| sheet_name | string | no | Sheet${Random number} | Only used when file_format_type is excel. | - -### path [string] - -The target dir path is required. - -### bucket [string] - -The bucket address of s3 file system, for example: `s3n://seatunnel-test`, if you use `s3a` protocol, this parameter should be `s3a://seatunnel-test`. - -### fs.s3a.endpoint [string] - -fs s3a endpoint - -### fs.s3a.aws.credentials.provider [string] - -The way to authenticate s3a. We only support `org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider` and `com.amazonaws.auth.InstanceProfileCredentialsProvider` now. - -More information about the credential provider you can see [Hadoop AWS Document](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html#Simple_name.2Fsecret_credentials_with_SimpleAWSCredentialsProvider.2A) - -### access_key [string] - -The access key of s3 file system. If this parameter is not set, please confirm that the credential provider chain can be authenticated correctly, you could check this [hadoop-aws](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html) +## Description -### access_secret [string] +Output data to aws s3 file system. -The access secret of s3 file system. If this parameter is not set, please confirm that the credential provider chain can be authenticated correctly, you could check this [hadoop-aws](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html) +## Supported DataSource Info + +| Datasource | Supported Versions | +|------------|--------------------| +| S3 | current | + +## Database Dependency + +> If you use spark/flink, In order to use this connector, You must ensure your spark/flink cluster already integrated hadoop. The tested hadoop version is 2.x. +> +> If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you download and install SeaTunnel Engine. You can check the jar package under `${SEATUNNEL_HOME}/lib` to confirm this. +> To use this connector you need put `hadoop-aws-3.1.4.jar` and `aws-java-sdk-bundle-1.11.271.jar` in `${SEATUNNEL_HOME}/lib` dir. + +## Data Type Mapping + +If write to `csv`, `text` file type, All column will be string. + +### Orc File Type + +| SeaTunnel Data type | Orc Data type | +|----------------------|-----------------------| +| STRING | STRING | +| BOOLEAN | BOOLEAN | +| TINYINT | BYTE | +| SMALLINT | SHORT | +| INT | INT | +| BIGINT | LONG | +| FLOAT | FLOAT | +| FLOAT | FLOAT | +| DOUBLE | DOUBLE | +| DECIMAL | DECIMAL | +| BYTES | BINARY | +| DATE | DATE | +| TIME
TIMESTAMP | TIMESTAMP | +| ROW | STRUCT | +| NULL | UNSUPPORTED DATA TYPE | +| ARRAY | LIST | +| Map | Map | + +### Parquet File Type + +| SeaTunnel Data type | Parquet Data type | +|----------------------|-----------------------| +| STRING | STRING | +| BOOLEAN | BOOLEAN | +| TINYINT | INT_8 | +| SMALLINT | INT_16 | +| INT | INT32 | +| BIGINT | INT64 | +| FLOAT | FLOAT | +| FLOAT | FLOAT | +| DOUBLE | DOUBLE | +| DECIMAL | DECIMAL | +| BYTES | BINARY | +| DATE | DATE | +| TIME
TIMESTAMP | TIMESTAMP_MILLIS | +| ROW | GroupType | +| NULL | UNSUPPORTED DATA TYPE | +| ARRAY | LIST | +| Map | Map | + +## Sink Options + +| name | type | required | default value | Description | +|----------------------------------|---------|----------|-------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| path | string | yes | - | | +| bucket | string | yes | - | | +| fs.s3a.endpoint | string | yes | - | | +| fs.s3a.aws.credentials.provider | string | yes | com.amazonaws.auth.InstanceProfileCredentialsProvider | The way to authenticate s3a. We only support `org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider` and `com.amazonaws.auth.InstanceProfileCredentialsProvider` now. | +| access_key | string | no | - | Only used when fs.s3a.aws.credentials.provider = org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider | +| access_secret | string | no | - | Only used when fs.s3a.aws.credentials.provider = org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider | +| custom_filename | boolean | no | false | Whether you need custom the filename | +| file_name_expression | string | no | "${transactionId}" | Only used when custom_filename is true | +| filename_time_format | string | no | "yyyy.MM.dd" | Only used when custom_filename is true | +| file_format_type | string | no | "csv" | | +| field_delimiter | string | no | '\001' | Only used when file_format is text | +| row_delimiter | string | no | "\n" | Only used when file_format is text | +| have_partition | boolean | no | false | Whether you need processing partitions. | +| partition_by | array | no | - | Only used when have_partition is true | +| partition_dir_expression | string | no | "${k0}=${v0}/${k1}=${v1}/.../${kn}=${vn}/" | Only used when have_partition is true | +| is_partition_field_write_in_file | boolean | no | false | Only used when have_partition is true | +| sink_columns | array | no | | When this parameter is empty, all fields are sink columns | +| is_enable_transaction | boolean | no | true | | +| batch_size | int | no | 1000000 | | +| compress_codec | string | no | none | | +| common-options | object | no | - | | +| max_rows_in_memory | int | no | - | Only used when file_format is excel. | +| sheet_name | string | no | Sheet${Random number} | Only used when file_format is excel. | +| hadoop_s3_properties | map | no | | If you need to add a other option, you could add it here and refer to this [link](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html) | +| | ### hadoop_s3_properties [map] @@ -208,6 +242,83 @@ Writer the sheet of the workbook ## Example +### Simple: + +> This example defines a SeaTunnel synchronization task that automatically generates data through FakeSource and sends it to S3File Sink. FakeSource generates a total of 16 rows of data (row.num=16), with each row having two fields, name (string type) and age (int type). The final target s3 dir will also create a file and all of the data in write in it. +> Before run this job, you need create s3 path: /seatunnel/text. And if you have not yet installed and deployed SeaTunnel, you need to follow the instructions in [Install SeaTunnel](../../start-v2/locally/deployment.md) to install and deploy SeaTunnel. And then follow the instructions in [Quick Start With SeaTunnel Engine](../../start-v2/locally/quick-start-seatunnel-engine.md) to run this job. + +``` +# Defining the runtime environment +env { + # You can set flink configuration here + execution.parallelism = 1 + job.mode = "BATCH" +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + FakeSource { + parallelism = 1 + result_table_name = "fake" + row.num = 16 + schema = { + fields { + c_map = "map>" + c_array = "array" + name = string + c_boolean = boolean + age = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_decimal = "decimal(16, 1)" + c_null = "null" + c_bytes = bytes + c_date = date + c_timestamp = timestamp + } + } + } + # If you would like to get more information about how to configure seatunnel and see full list of source plugins, + # please go to https://seatunnel.apache.org/docs/category/source-v2 +} + +transform { + # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, + # please go to https://seatunnel.apache.org/docs/category/transform-v2 +} + +sink { + S3File { + bucket = "s3a://seatunnel-test" + tmp_path = "/tmp/seatunnel" + path="/seatunnel/text" + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider="com.amazonaws.auth.InstanceProfileCredentialsProvider" + file_format_type = "text" + field_delimiter = "\t" + row_delimiter = "\n" + have_partition = true + partition_by = ["age"] + partition_dir_expression = "${k0}=${v0}" + is_partition_field_write_in_file = true + custom_filename = true + file_name_expression = "${transactionId}_${now}" + filename_time_format = "yyyy.MM.dd" + sink_columns = ["name","age"] + is_enable_transaction=true + hadoop_s3_properties { + "fs.s3a.buffer.dir" = "/data/st_test/s3a" + "fs.s3a.fast.upload.buffer" = "disk" + } + } + # If you would like to get more information about how to configure seatunnel and see full list of sink plugins, + # please go to https://seatunnel.apache.org/docs/category/sink-v2 +} +``` + For text file format with `have_partition` and `custom_filename` and `sink_columns` and `com.amazonaws.auth.InstanceProfileCredentialsProvider` ```hocon diff --git a/docs/en/connector-v2/sink/SelectDB-Cloud.md b/docs/en/connector-v2/sink/SelectDB-Cloud.md index 24d22d5a2d04..6ad2997903bd 100644 --- a/docs/en/connector-v2/sink/SelectDB-Cloud.md +++ b/docs/en/connector-v2/sink/SelectDB-Cloud.md @@ -2,139 +2,169 @@ > SelectDB Cloud sink connector -## Description +## Support Those Engines -Used to send data to SelectDB Cloud. Both support streaming and batch mode. -The internal implementation of SelectDB Cloud sink connector upload after batch caching and commit the CopyInto sql to load data into the table. +> Spark
+> Flink
+> SeaTunnel Zeta
-:::tip - -Version Supported - -* supported `SelectDB Cloud version is >= 2.2.x` - -::: - -## Key features +## Key Features - [x] [exactly-once](../../concept/connector-v2-features.md) - [x] [cdc](../../concept/connector-v2-features.md) -## Options - -| name | type | required | default value | -|--------------------|--------|----------|------------------------| -| load-url | string | yes | - | -| jdbc-url | string | yes | - | -| cluster-name | string | yes | - | -| username | string | yes | - | -| password | string | yes | - | -| table.identifier | string | yes | - | -| sink.enable-delete | bool | no | false | -| selectdb.config | map | yes | - | -| sink.buffer-size | int | no | 10 * 1024 * 1024 (1MB) | -| sink.buffer-count | int | no | 10000 | -| sink.max-retries | int | no | 3 | - -### load-url [string] - -`SelectDB Cloud` warehouse http address, the format is `warehouse_ip:http_port` - -### jdbc-url [string] - -`SelectDB Cloud` warehouse jdbc address, the format is `warehouse_ip:mysql_port` - -### cluster-name [string] - -`SelectDB Cloud` cluster name - -### username [string] - -`SelectDB Cloud` user username - -### password [string] - -`SelectDB Cloud` user password - -### table.identifier [string] - -The name of `SelectDB Cloud` table, the format is `database.table` +## Description -### sink.enable-delete [string] +Used to send data to SelectDB Cloud. Both support streaming and batch mode. +The internal implementation of SelectDB Cloud sink connector upload after batch caching and commit the CopyInto sql to load data into the table. -Whether to enable deletion. This option requires SelectDB Cloud table to enable batch delete function, and only supports Unique model. +## Supported DataSource Info -`ALTER TABLE example_db.my_table ENABLE FEATURE "BATCH_DELETE";` +:::tip -### selectdb.config [map] +Version Supported -Write property configuration +* supported `SelectDB Cloud version is >= 2.2.x` -CSV Write: +::: -``` -selectdb.config { - file.type="csv" - file.column_separator="," - file.line_delimiter="\n" +## Sink Options + +| Name | Type | Required | Default | Description | +|--------------------|--------|----------|------------------------|-------------------------------------------------------------------------------------------------------------------------------------------| +| load-url | String | Yes | - | `SelectDB Cloud` warehouse http address, the format is `warehouse_ip:http_port` | +| jdbc-url | String | Yes | - | `SelectDB Cloud` warehouse jdbc address, the format is `warehouse_ip:mysql_port` | +| cluster-name | String | Yes | - | `SelectDB Cloud` cluster name | +| username | String | Yes | - | `SelectDB Cloud` user username | +| password | String | Yes | - | `SelectDB Cloud` user password | +| table.identifier | String | Yes | - | The name of `SelectDB Cloud` table, the format is `database.table` | +| sink.enable-delete | bool | No | false | Whether to enable deletion. This option requires SelectDB Cloud table to enable batch delete function, and only supports Unique model. | +| sink.max-retries | int | No | 3 | the max retry times if writing records to database failed | +| sink.buffer-size | int | No | 10 * 1024 * 1024 (1MB) | the buffer size to cache data for stream load. | +| sink.buffer-count | int | No | 10000 | the buffer count to cache data for stream load. | +| selectdb.config | map | yes | - | This option is used to support operations such as `insert`, `delete`, and `update` when automatically generate sql,and supported formats. | + +## Data Type Mapping + +| SelectDB Cloud Data type | SeaTunnel Data type | +|--------------------------|-----------------------------------------| +| BOOLEAN | BOOLEAN | +| TINYINT | TINYINT | +| SMALLINT | SMALLINT
TINYINT | +| INT | INT
SMALLINT
TINYINT | +| BIGINT | BIGINT
INT
SMALLINT
TINYINT | +| LARGEINT | BIGINT
INT
SMALLINT
TINYINT | +| FLOAT | FLOAT | +| DOUBLE | DOUBLE
FLOAT | +| DECIMAL | DECIMAL
DOUBLE
FLOAT | +| DATE | DATE | +| DATETIME | TIMESTAMP | +| CHAR | STRING | +| VARCHAR | STRING | +| STRING | STRING | +| ARRAY | ARRAY | +| MAP | MAP | +| JSON | STRING | +| HLL | Not supported yet | +| BITMAP | Not supported yet | +| QUANTILE_STATE | Not supported yet | +| STRUCT | Not supported yet | + +#### Supported import data formats + +The supported formats include CSV and JSON + +## Task Example + +### Simple: + +> The following example describes writing multiple data types to SelectDBCloud, and users need to create corresponding tables downstream + +```hocon +env { + parallelism = 1 + job.mode = "BATCH" + checkpoint.interval = 10000 } -``` -JSON Write: +source { + FakeSource { + row.num = 10 + map.size = 10 + array.size = 10 + bytes.length = 10 + string.length = 10 + schema = { + fields { + c_map = "map>" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_decimal = "decimal(16, 1)" + c_null = "null" + c_bytes = bytes + c_date = date + c_timestamp = timestamp + } + } + } +} -``` -selectdb.config { - file.type="json" +sink { + SelectDBCloud { + load-url = "warehouse_ip:http_port" + jdbc-url = "warehouse_ip:mysql_port" + cluster-name = "Cluster" + table.identifier = "test.test" + username = "admin" + password = "******" + selectdb.config { + file.type = "json" + } + } } ``` -### sink.buffer-size [string] - -The maximum capacity of the cache, in bytes, that is flushed to the object storage. The default is 10MB. it is not recommended to modify it. - -### sink.buffer-count [string] - -Maximum number of entries flushed to the object store. The default value is 10000. it is not recommended to modify. - -### sink.max-retries [string] - -The maximum number of retries in the Commit phase, the default is 3. - -## Example - -Use JSON format to import data +### Use JSON format to import data ``` sink { SelectDBCloud { - load-url="warehouse_ip:http_port" - jdbc-url="warehouse_ip:mysql_port" - cluster-name="Cluster" - table.identifier="test.test" - username="admin" - password="******" + load-url = "warehouse_ip:http_port" + jdbc-url = "warehouse_ip:mysql_port" + cluster-name = "Cluster" + table.identifier = "test.test" + username = "admin" + password = "******" selectdb.config { - file.type="json" + file.type = "json" } } } + ``` -Use CSV format to import data +### Use CSV format to import data ``` sink { SelectDBCloud { - load-url="warehouse_ip:http_port" - jdbc-url="warehouse_ip:mysql_port" - cluster-name="Cluster" - table.identifier="test.test" - username="admin" - password="******" + load-url = "warehouse_ip:http_port" + jdbc-url = "warehouse_ip:mysql_port" + cluster-name = "Cluster" + table.identifier = "test.test" + username = "admin" + password = "******" selectdb.config { - file.type="csv" - file.column_separator="," - file.line_delimiter="\n" + file.type = "csv" + file.column_separator = "," + file.line_delimiter = "\n" } } } diff --git a/docs/en/connector-v2/sink/Slack.md b/docs/en/connector-v2/sink/Slack.md index 27ba01c32b0f..7ed87d2022c3 100644 --- a/docs/en/connector-v2/sink/Slack.md +++ b/docs/en/connector-v2/sink/Slack.md @@ -2,42 +2,39 @@ > Slack sink connector -## Description - -Used to send data to Slack Channel. Both support streaming and batch mode. +## Support Those Engines -> For example, if the data from upstream is [`age: 12, name: huan`], the content send to socket server is the following: `{"name":"huan","age":17}` +> Spark
+> Flink
+> SeaTunnel Zeta
## Key features - [ ] [exactly-once](../../concept/connector-v2-features.md) +- [ ] [cdc](../../concept/connector-v2-features.md) -## Options - -| name | type | required | default value | -|----------------|--------|----------|---------------| -| webhooks_url | String | Yes | - | -| oauth_token | String | Yes | - | -| slack_channel | String | Yes | - | -| common-options | | no | - | - -### webhooks_url [string] +## Description -Slack webhook url +Used to send data to Slack Channel. Both support streaming and batch mode. -### oauth_token [string] +> For example, if the data from upstream is [`age: 12, name: huan`], the content send to socket server is the following: `{"name":"huan","age":17}` -Slack oauth token used for the actual authentication +## Data Type Mapping -### slack_channel [string] +All data types are mapped to string. -slack channel for data write +## Options -### common options +| Name | Type | Required | Default | Description | +|----------------|--------|----------|---------|-----------------------------------------------------------------------------------------------------| +| webhooks_url | String | Yes | - | Slack webhook url | +| oauth_token | String | Yes | - | Slack oauth token used for the actual authentication | +| slack_channel | String | Yes | - | slack channel for data write | +| common-options | | no | - | Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details | -Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details +## Task Example -## Example +### Simple: ```hocon sink { diff --git a/docs/en/connector-v2/sink/Snowflake.md b/docs/en/connector-v2/sink/Snowflake.md index 21bfb175ef7e..1dfff5e09c74 100644 --- a/docs/en/connector-v2/sink/Snowflake.md +++ b/docs/en/connector-v2/sink/Snowflake.md @@ -61,8 +61,7 @@ Write data through jdbc. Support Batch mode and Streaming mode, support concurre | support_upsert_by_query_primary_key_exist | Boolean | No | false | Choose to use INSERT sql, UPDATE sql to process update events(INSERT, UPDATE_AFTER) based on query primary key exists. This configuration is only used when database unsupport upsert syntax. **Note**: that this method has low performance | | connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete. | | max_retries | Int | No | 0 | The number of retries to submit failed (executeBatch) | -| batch_size | Int | No | 1000 | For batch writing, when the number of buffered records reaches the number of `batch_size` or the time reaches `batch_interval_ms`
, the data will be flushed into the database | -| batch_interval_ms | Int | No | 1000 | For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms`, the data will be flushed into the database | +| batch_size | Int | No | 1000 | For batch writing, when the number of buffered records reaches the number of `batch_size` or the time reaches `checkpoint.interval`
, the data will be flushed into the database | | max_commit_attempts | Int | No | 3 | The number of retries for transaction commit failures | | transaction_timeout_sec | Int | No | -1 | The timeout after the transaction is opened, the default is -1 (never timeout). Note that setting the timeout may affect
exactly-once semantics | | auto_commit | Boolean | No | true | Automatic transaction commit is enabled by default | diff --git a/docs/en/connector-v2/sink/StarRocks.md b/docs/en/connector-v2/sink/StarRocks.md index 7c6491fb591e..38893a429ef7 100644 --- a/docs/en/connector-v2/sink/StarRocks.md +++ b/docs/en/connector-v2/sink/StarRocks.md @@ -2,94 +2,43 @@ > StarRocks sink connector -## Description +## Support These Engines -Used to send data to StarRocks. Both support streaming and batch mode. -The internal implementation of StarRocks sink connector is cached and imported by stream load in batches. +> Spark
+> Flink
+> SeaTunnel Zeta
-## Key features +## Key Features - [ ] [exactly-once](../../concept/connector-v2-features.md) - [x] [cdc](../../concept/connector-v2-features.md) -## Options - -| name | type | required | default value | -|-----------------------------|---------|----------|-----------------| -| nodeUrls | list | yes | - | -| base-url | string | yes | - | -| username | string | yes | - | -| password | string | yes | - | -| database | string | yes | - | -| table | string | no | - | -| labelPrefix | string | no | - | -| batch_max_rows | long | no | 1024 | -| batch_max_bytes | int | no | 5 * 1024 * 1024 | -| batch_interval_ms | int | no | - | -| max_retries | int | no | - | -| retry_backoff_multiplier_ms | int | no | - | -| max_retry_backoff_ms | int | no | - | -| enable_upsert_delete | boolean | no | false | -| save_mode_create_template | string | no | see below | -| starrocks.config | map | no | - | - -### nodeUrls [list] - -`StarRocks` cluster address, the format is `["fe_ip:fe_http_port", ...]` - -### base-url [string] - -The JDBC URL like `jdbc:mysql://localhost:9030/` or `jdbc:mysql://localhost:9030` or `jdbc:mysql://localhost:9030/db` - -### username [string] - -`StarRocks` user username - -### password [string] - -`StarRocks` user password - -### database [string] - -The name of StarRocks database - -### table [string] - -The name of StarRocks table, If not set, the table name will be the name of the upstream table - -### labelPrefix [string] - -The prefix of StarRocks stream load label - -### batch_max_rows [long] - -For batch writing, when the number of buffers reaches the number of `batch_max_rows` or the byte size of `batch_max_bytes` or the time reaches `batch_interval_ms`, the data will be flushed into the StarRocks - -### batch_max_bytes [int] - -For batch writing, when the number of buffers reaches the number of `batch_max_rows` or the byte size of `batch_max_bytes` or the time reaches `batch_interval_ms`, the data will be flushed into the StarRocks - -### batch_interval_ms [int] - -For batch writing, when the number of buffers reaches the number of `batch_max_rows` or the byte size of `batch_max_bytes` or the time reaches `batch_interval_ms`, the data will be flushed into the StarRocks - -### max_retries [int] - -The number of retries to flush failed - -### retry_backoff_multiplier_ms [int] - -Using as a multiplier for generating the next delay for backoff - -### max_retry_backoff_ms [int] - -The amount of time to wait before attempting to retry a request to `StarRocks` - -### enable_upsert_delete [boolean] +## Description -Whether to enable upsert/delete, only supports PrimaryKey model. +Used to send data to StarRocks. Both support streaming and batch mode. +The internal implementation of StarRocks sink connector is cached and imported by stream load in batches. -### save_mode_create_template [string] +## Sink Options + +| Name | Type | Required | Default | Description | +|-----------------------------|---------|----------|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| nodeUrls | list | yes | - | `StarRocks` cluster address, the format is `["fe_ip:fe_http_port", ...]` | +| base-url | string | yes | - | The JDBC URL like `jdbc:mysql://localhost:9030/` or `jdbc:mysql://localhost:9030` or `jdbc:mysql://localhost:9030/db` | +| username | string | yes | - | `StarRocks` user username | +| password | string | yes | - | `StarRocks` user password | +| database | string | yes | - | The name of StarRocks database | +| table | string | no | - | The name of StarRocks table, If not set, the table name will be the name of the upstream table | +| labelPrefix | string | no | - | The prefix of StarRocks stream load label | +| batch_max_rows | long | no | 1024 | For batch writing, when the number of buffers reaches the number of `batch_max_rows` or the byte size of `batch_max_bytes` or the time reaches `checkpoint.interval`, the data will be flushed into the StarRocks | +| batch_max_bytes | int | no | 5 * 1024 * 1024 | For batch writing, when the number of buffers reaches the number of `batch_max_rows` or the byte size of `batch_max_bytes` or the time reaches `checkpoint.interval`, the data will be flushed into the StarRocks | +| max_retries | int | no | - | The number of retries to flush failed | +| retry_backoff_multiplier_ms | int | no | - | Using as a multiplier for generating the next delay for backoff | +| max_retry_backoff_ms | int | no | - | The amount of time to wait before attempting to retry a request to `StarRocks` | +| enable_upsert_delete | boolean | no | false | Whether to enable upsert/delete, only supports PrimaryKey model. | +| save_mode_create_template | string | no | see below | see below | +| starrocks.config | map | no | - | The parameter of the stream load `data_desc` | + +### save_mode_create_template We use templates to automatically create starrocks tables, which will create corresponding table creation statements based on the type of upstream data and schema type, @@ -131,19 +80,72 @@ You can use the following placeholders description of StarRocks - rowtype_primary_key: Used to get the primary key in the upstream schema (maybe a list) -### starrocks.config [map] - -The parameter of the stream load `data_desc` +## Data Type Mapping + +| StarRocks Data type | SeaTunnel Data type | +|---------------------|---------------------| +| BOOLEAN | BOOLEAN | +| TINYINT | TINYINT | +| SMALLINT | SMALLINT | +| INT | INT | +| BIGINT | BIGINT | +| FLOAT | FLOAT | +| DOUBLE | DOUBLE | +| DECIMAL | DECIMAL | +| DATE | STRING | +| TIME | STRING | +| DATETIME | STRING | +| STRING | STRING | +| ARRAY | STRING | +| MAP | STRING | +| BYTES | STRING | #### Supported import data formats -The supported formats include CSV and JSON. Default value: JSON +The supported formats include CSV and JSON -## Example +## Task Example -Use JSON format to import data +### Simple: + +> The following example describes writing multiple data types to StarRocks, and users need to create corresponding tables downstream ```hocon +env { + parallelism = 1 + job.mode = "BATCH" + checkpoint.interval = 10000 +} + +source { + FakeSource { + row.num = 10 + map.size = 10 + array.size = 10 + bytes.length = 10 + string.length = 10 + schema = { + fields { + c_map = "map>" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_decimal = "decimal(16, 1)" + c_null = "null" + c_bytes = bytes + c_date = date + c_timestamp = timestamp + } + } + } +} + sink { StarRocks { nodeUrls = ["e2e_starRocksdb:8030"] @@ -158,12 +160,29 @@ sink { } } } - ``` -Use CSV format to import data +### Support write cdc changelog event(INSERT/UPDATE/DELETE) ```hocon +sink { + StarRocks { + nodeUrls = ["e2e_starRocksdb:8030"] + username = root + password = "" + database = "test" + table = "e2e_table_sink" + ... + + // Support upsert/delete event synchronization (enable_upsert_delete=true), only supports PrimaryKey model. + enable_upsert_delete = true + } +} +``` + +### Use JSON format to import data + +``` sink { StarRocks { nodeUrls = ["e2e_starRocksdb:8030"] @@ -173,17 +192,17 @@ sink { table = "e2e_table_sink" batch_max_rows = 10 starrocks.config = { - format = "CSV" - column_separator = "\\x01" - row_delimiter = "\\x02" + format = "JSON" + strip_outer_array = true } } } + ``` -Support write cdc changelog event(INSERT/UPDATE/DELETE) +### Use CSV format to import data -```hocon +``` sink { StarRocks { nodeUrls = ["e2e_starRocksdb:8030"] @@ -191,10 +210,12 @@ sink { password = "" database = "test" table = "e2e_table_sink" - ... - - // Support upsert/delete event synchronization (enable_upsert_delete=true), only supports PrimaryKey model. - enable_upsert_delete = true + batch_max_rows = 10 + starrocks.config = { + format = "CSV" + column_separator = "\\x01" + row_delimiter = "\\x02" + } } } ``` diff --git a/docs/en/connector-v2/sink/Tablestore.md b/docs/en/connector-v2/sink/Tablestore.md index ed59895c65f1..8f161ad25f6e 100644 --- a/docs/en/connector-v2/sink/Tablestore.md +++ b/docs/en/connector-v2/sink/Tablestore.md @@ -21,7 +21,6 @@ Write data to `Tablestore` | table | string | yes | - | | primary_keys | array | yes | - | | batch_size | string | no | 25 | -| batch_interval_ms | string | no | 1000 | | common-options | config | no | - | ### end_point [string] diff --git a/docs/en/connector-v2/sink/Vertica.md b/docs/en/connector-v2/sink/Vertica.md index 0db8571d55f2..9a6244076828 100644 --- a/docs/en/connector-v2/sink/Vertica.md +++ b/docs/en/connector-v2/sink/Vertica.md @@ -67,8 +67,7 @@ semantics (using XA transaction guarantee). | support_upsert_by_query_primary_key_exist | Boolean | No | false | Choose to use INSERT sql, UPDATE sql to process update events(INSERT, UPDATE_AFTER) based on query primary key exists. This configuration is only used when database unsupport upsert syntax. **Note**: that this method has low performance | | connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete. | | max_retries | Int | No | 0 | The number of retries to submit failed (executeBatch) | -| batch_size | Int | No | 1000 | For batch writing, when the number of buffered records reaches the number of `batch_size` or the time reaches `batch_interval_ms`
, the data will be flushed into the database | -| batch_interval_ms | Int | No | 1000 | For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms`, the data will be flushed into the database | +| batch_size | Int | No | 1000 | For batch writing, when the number of buffered records reaches the number of `batch_size` or the time reaches `checkpoint.interval`
, the data will be flushed into the database | | is_exactly_once | Boolean | No | false | Whether to enable exactly-once semantics, which will use Xa transactions. If on, you need to
set `xa_data_source_class_name`. | | generate_sink_sql | Boolean | No | false | Generate sql statements based on the database table you want to write to | | xa_data_source_class_name | String | No | - | The xa data source class name of the database Driver, for example, vertical is `com.vertical.cj.jdbc.VerticalXADataSource`, and
please refer to appendix for other data sources | diff --git a/docs/en/connector-v2/source/Clickhouse.md b/docs/en/connector-v2/source/Clickhouse.md index 7596bf72a8f0..d70a8f0e33fb 100644 --- a/docs/en/connector-v2/source/Clickhouse.md +++ b/docs/en/connector-v2/source/Clickhouse.md @@ -66,7 +66,7 @@ The following example demonstrates how to create a data synchronization job that ```bash # Set the basic configuration of the task to be performed env { - execution.parallelism = 1 + execution.parallelism = 10 job.mode = "BATCH" } diff --git a/docs/en/connector-v2/source/DB2.md b/docs/en/connector-v2/source/DB2.md index 7ea91b7165c7..c9eb6a578b6a 100644 --- a/docs/en/connector-v2/source/DB2.md +++ b/docs/en/connector-v2/source/DB2.md @@ -54,20 +54,20 @@ Read external data source data through JDBC. ## Source Options -| Name | Type | Required | Default | Description | -|------------------------------|--------|----------|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:db2://127.0.0.1:50000/dbname | -| driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
if you use db2 the value is `com.ibm.db2.jdbc.app.DB2Driver`. | -| user | String | No | - | Connection instance user name | -| password | String | No | - | Connection instance password | -| query | String | Yes | - | Query statement | -| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete | -| partition_column | String | No | - | The column name for parallelism's partition, only support numeric type,Only support numeric type primary key, and only can config one column. | -| partition_lower_bound | Long | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. | -| partition_upper_bound | Long | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. | -| partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. default value is job parallelism | -| fetch_size | Int | No | 0 | For queries that return a large number of objects,you can configure
the row fetch size used in the query toimprove performance by
reducing the number database hits required to satisfy the selection criteria.
Zero means use jdbc default value. | -| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details | +| Name | Type | Required | Default | Description | +|------------------------------|------------|----------|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:db2://127.0.0.1:50000/dbname | +| driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
if you use db2 the value is `com.ibm.db2.jdbc.app.DB2Driver`. | +| user | String | No | - | Connection instance user name | +| password | String | No | - | Connection instance password | +| query | String | Yes | - | Query statement | +| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete | +| partition_column | String | No | - | The column name for parallelism's partition, only support numeric type,Only support numeric type primary key, and only can config one column. | +| partition_lower_bound | BigDecimal | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. | +| partition_upper_bound | BigDecimal | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. | +| partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. default value is job parallelism | +| fetch_size | Int | No | 0 | For queries that return a large number of objects,you can configure
the row fetch size used in the query toimprove performance by
reducing the number database hits required to satisfy the selection criteria.
Zero means use jdbc default value. | +| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details | ### Tips diff --git a/docs/en/connector-v2/source/FtpFile.md b/docs/en/connector-v2/source/FtpFile.md index c692a7483a6d..c9fb8e70cdb8 100644 --- a/docs/en/connector-v2/source/FtpFile.md +++ b/docs/en/connector-v2/source/FtpFile.md @@ -58,9 +58,9 @@ The target ftp host is required The target ftp port is required -### username [string] +### user [string] -The target ftp username is required +The target ftp user name is required ### password [string] diff --git a/docs/en/connector-v2/source/Github.md b/docs/en/connector-v2/source/Github.md index 5cc6beea76b9..900a207e6971 100644 --- a/docs/en/connector-v2/source/Github.md +++ b/docs/en/connector-v2/source/Github.md @@ -28,7 +28,7 @@ Used to read data from Github. | body | String | No | - | | json_field | Config | No | - | | content_json | String | No | - | -| poll_interval_ms | int | No | - | +| poll_interval_millis | int | No | - | | retry | int | No | - | | retry_backoff_multiplier_ms | int | No | 100 | | retry_backoff_max_ms | int | No | 10000 | @@ -55,7 +55,7 @@ http params http body -### poll_interval_ms [int] +### poll_interval_millis [int] request http api interval(millis) in stream mode diff --git a/docs/en/connector-v2/source/Gitlab.md b/docs/en/connector-v2/source/Gitlab.md index b2c17c9f2465..ff3b6bc6423b 100644 --- a/docs/en/connector-v2/source/Gitlab.md +++ b/docs/en/connector-v2/source/Gitlab.md @@ -28,7 +28,7 @@ Used to read data from Gitlab. | body | String | No | - | | json_field | Config | No | - | | content_json | String | No | - | -| poll_interval_ms | int | No | - | +| poll_interval_millis | int | No | - | | retry | int | No | - | | retry_backoff_multiplier_ms | int | No | 100 | | retry_backoff_max_ms | int | No | 10000 | @@ -55,7 +55,7 @@ http params http body -### poll_interval_ms [int] +### poll_interval_millis [int] request http api interval(millis) in stream mode diff --git a/docs/en/connector-v2/source/HdfsFile.md b/docs/en/connector-v2/source/HdfsFile.md index f479e40a2bc2..88c1e35f87e3 100644 --- a/docs/en/connector-v2/source/HdfsFile.md +++ b/docs/en/connector-v2/source/HdfsFile.md @@ -1,20 +1,14 @@ # HdfsFile -> Hdfs file source connector +> Hdfs File Source Connector -## Description - -Read data from hdfs file system. - -:::tip +## Support Those Engines -If you use spark/flink, In order to use this connector, You must ensure your spark/flink cluster already integrated hadoop. The tested hadoop version is 2.x. +> Spark
+> Flink
+> SeaTunnel Zeta
-If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you download and install SeaTunnel Engine. You can check the jar package under ${SEATUNNEL_HOME}/lib to confirm this. - -::: - -## Key features +## Key Features - [x] [batch](../../concept/connector-v2-features.md) - [ ] [stream](../../concept/connector-v2-features.md) @@ -33,238 +27,57 @@ Read all the data in a split in a pollNext call. What splits are read will be sa - [x] json - [x] excel -## Options - -| name | type | required | default value | -|---------------------------|---------|----------|---------------------| -| path | string | yes | - | -| file_format_type | string | yes | - | -| fs.defaultFS | string | yes | - | -| read_columns | list | yes | - | -| hdfs_site_path | string | no | - | -| delimiter | string | no | \001 | -| parse_partition_from_path | boolean | no | true | -| date_format | string | no | yyyy-MM-dd | -| datetime_format | string | no | yyyy-MM-dd HH:mm:ss | -| time_format | string | no | HH:mm:ss | -| kerberos_principal | string | no | - | -| kerberos_keytab_path | string | no | - | -| skip_header_row_number | long | no | 0 | -| schema | config | no | - | -| common-options | | no | - | -| sheet_name | string | no | - | -| file_filter_pattern | string | no | - | - -### path [string] - -The source file path. - -### delimiter [string] - -Field delimiter, used to tell connector how to slice and dice fields when reading text files - -default `\001`, the same as hive's default delimiter - -### parse_partition_from_path [boolean] - -Control whether parse the partition keys and values from file path - -For example if you read a file from path `hdfs://hadoop-cluster/tmp/seatunnel/parquet/name=tyrantlucifer/age=26` - -Every record data from file will be added these two fields: - -| name | age | -|---------------|-----| -| tyrantlucifer | 26 | - -Tips: **Do not define partition fields in schema option** - -### date_format [string] - -Date type format, used to tell connector how to convert string to date, supported as the following formats: - -`yyyy-MM-dd` `yyyy.MM.dd` `yyyy/MM/dd` - -default `yyyy-MM-dd` - -### datetime_format [string] - -Datetime type format, used to tell connector how to convert string to datetime, supported as the following formats: - -`yyyy-MM-dd HH:mm:ss` `yyyy.MM.dd HH:mm:ss` `yyyy/MM/dd HH:mm:ss` `yyyyMMddHHmmss` - -default `yyyy-MM-dd HH:mm:ss` - -### time_format [string] - -Time type format, used to tell connector how to convert string to time, supported as the following formats: - -`HH:mm:ss` `HH:mm:ss.SSS` - -default `HH:mm:ss` - -### skip_header_row_number [long] - -Skip the first few lines, but only for the txt and csv. - -For example, set like following: - -`skip_header_row_number = 2` - -then SeaTunnel will skip the first 2 lines from source files - -### file_format_type [string] - -File type, supported as the following file types: - -`text` `csv` `parquet` `orc` `json` `excel` - -If you assign file type to `json`, you should also assign schema option to tell connector how to parse data to the row you want. - -For example: - -upstream data is the following: - -```json - -{"code": 200, "data": "get success", "success": true} - -``` - -You can also save multiple pieces of data in one file and split them by newline: - -```json lines - -{"code": 200, "data": "get success", "success": true} -{"code": 300, "data": "get failed", "success": false} - -``` - -you should assign schema as the following: - -```hocon - -schema { - fields { - code = int - data = string - success = boolean - } -} - -``` - -connector will generate data as the following: - -| code | data | success | -|------|-------------|---------| -| 200 | get success | true | - -If you assign file type to `parquet` `orc`, schema option not required, connector can find the schema of upstream data automatically. - -If you assign file type to `text` `csv`, you can choose to specify the schema information or not. +## Description -For example, upstream data is the following: +Read data from hdfs file system. -```text +## Supported DataSource Info -tyrantlucifer#26#male +| Datasource | Supported Versions | +|------------|--------------------| +| HdfsFile | hadoop 2.x and 3.x | -``` +## Source Options -If you do not assign data schema connector will treat the upstream data as the following: +| Name | Type | Required | Default | Description | +|---------------------------|---------|----------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| path | string | yes | - | The source file path. | +| file_format_type | string | yes | - | We supported as the following file types:`text` `json` `csv` `orc` `parquet` `excel`.Please note that, The final file name will end with the file_format's suffix, the suffix of the text file is `txt`. | +| fs.defaultFS | string | yes | - | The hadoop cluster address that start with `hdfs://`, for example: `hdfs://hadoopcluster` | +| read_columns | list | yes | - | The read column list of the data source, user can use it to implement field projection.The file type supported column projection as the following shown:[text,json,csv,orc,parquet,excel].Tips: If the user wants to use this feature when reading `text` `json` `csv` files, the schema option must be configured. | +| hdfs_site_path | string | no | - | The path of `hdfs-site.xml`, used to load ha configuration of namenodes | +| delimiter | string | no | \001 | Field delimiter, used to tell connector how to slice and dice fields when reading text files. default `\001`, the same as hive's default delimiter | +| parse_partition_from_path | boolean | no | true | Control whether parse the partition keys and values from file path. For example if you read a file from path `hdfs://hadoop-cluster/tmp/seatunnel/parquet/name=tyrantlucifer/age=26`. Every record data from file will be added these two fields:[name:tyrantlucifer,age:26].Tips:Do not define partition fields in schema option. | +| date_format | string | no | yyyy-MM-dd | Date type format, used to tell connector how to convert string to date, supported as the following formats:`yyyy-MM-dd` `yyyy.MM.dd` `yyyy/MM/dd` default `yyyy-MM-dd`.Date type format, used to tell connector how to convert string to date, supported as the following formats:`yyyy-MM-dd` `yyyy.MM.dd` `yyyy/MM/dd` default `yyyy-MM-dd` | +| datetime_format | string | no | yyyy-MM-dd HH:mm:ss | Datetime type format, used to tell connector how to convert string to datetime, supported as the following formats:`yyyy-MM-dd HH:mm:ss` `yyyy.MM.dd HH:mm:ss` `yyyy/MM/dd HH:mm:ss` `yyyyMMddHHmmss` .default `yyyy-MM-dd HH:mm:ss` | +| time_format | string | no | HH:mm:ss | Time type format, used to tell connector how to convert string to time, supported as the following formats:`HH:mm:ss` `HH:mm:ss.SSS`.default `HH:mm:ss` | +| kerberos_principal | string | no | - | The principal of kerberos | +| kerberos_keytab_path | string | no | - | The keytab path of kerberos | +| skip_header_row_number | long | no | 0 | Skip the first few lines, but only for the txt and csv.For example, set like following:`skip_header_row_number = 2`.then Seatunnel will skip the first 2 lines from source files | +| schema | config | no | - | the schema fields of upstream data | +| common-options | | no | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. | +| sheet_name | string | no | - | Reader the sheet of the workbook,Only used when file_format is excel. | -| content | -|-----------------------| -| tyrantlucifer#26#male | +### Tips -If you assign data schema, you should also assign the option `delimiter` too except CSV file type +> If you use spark/flink, In order to use this connector, You must ensure your spark/flink cluster already integrated hadoop. The tested hadoop version is 2.x. If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you download and install SeaTunnel Engine. You can check the jar package under ${SEATUNNEL_HOME}/lib to confirm this. -you should assign schema and delimiter as the following: +## Task Example -```hocon +### Simple: -delimiter = "#" -schema { - fields { - name = string - age = int - gender = string - } -} +> This example defines a SeaTunnel synchronization task that read data from Hdfs and sends it to Hdfs. ``` - -connector will generate data as the following: - -| name | age | gender | -|---------------|-----|--------| -| tyrantlucifer | 26 | male | - -### fs.defaultFS [string] - -Hdfs cluster address. - -### hdfs_site_path [string] - -The path of `hdfs-site.xml`, used to load ha configuration of namenodes - -### kerberos_principal [string] - -The principal of kerberos - -### kerberos_keytab_path [string] - -The keytab path of kerberos - -### schema [Config] - -#### fields [Config] - -the schema fields of upstream data - -### read_columns [list] - -The read column list of the data source, user can use it to implement field projection. - -The file type supported column projection as the following shown: - -- text -- json -- csv -- orc -- parquet -- excel - -**Tips: If the user wants to use this feature when reading `text` `json` `csv` files, the schema option must be configured** - -### common options - -Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. - -### sheet_name [string] - -Reader the sheet of the workbook,Only used when file_format_type is excel. - -### file_filter_pattern [string] - -Filter pattern, which used for filtering files. - -## Example - -```hocon - -HdfsFile { - path = "/apps/hive/demo/student" - file_format_type = "parquet" - fs.defaultFS = "hdfs://namenode001" +# Defining the runtime environment +env { + # You can set flink configuration here + execution.parallelism = 1 + job.mode = "BATCH" } -``` - -```hocon - -HdfsFile { +source { + HdfsFile { schema { fields { name = string @@ -274,24 +87,24 @@ HdfsFile { path = "/apps/hive/demo/student" type = "json" fs.defaultFS = "hdfs://namenode001" + } + # If you would like to get more information about how to configure seatunnel and see full list of source plugins, + # please go to https://seatunnel.apache.org/docs/category/source-v2 } -``` - -## Changelog - -### 2.2.0-beta 2022-09-26 - -- Add HDFS File Source Connector - -### 2.3.0-beta 2022-10-20 - -- [BugFix] Fix the bug of incorrect path in windows environment ([2980](https://github.com/apache/seatunnel/pull/2980)) -- [Improve] Support extract partition from SeaTunnelRow fields ([3085](https://github.com/apache/seatunnel/pull/3085)) -- [Improve] Support parse field from file path ([2985](https://github.com/apache/seatunnel/pull/2985)) - -### next version +transform { + # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, + # please go to https://seatunnel.apache.org/docs/category/transform-v2 +} -- [Improve] Support skip header for csv and txt files ([3900](https://github.com/apache/seatunnel/pull/3840)) -- [Improve] Support kerberos authentication ([3840](https://github.com/apache/seatunnel/pull/3840)) +sink { + HdfsFile { + fs.defaultFS = "hdfs://hadoopcluster" + path = "/tmp/hive/warehouse/test2" + file_format = "orc" + } + # If you would like to get more information about how to configure seatunnel and see full list of sink plugins, + # please go to https://seatunnel.apache.org/docs/category/sink-v2 +} +``` diff --git a/docs/en/connector-v2/source/Hive.md b/docs/en/connector-v2/source/Hive.md index f9f35aaf733f..afa9893d5b2b 100644 --- a/docs/en/connector-v2/source/Hive.md +++ b/docs/en/connector-v2/source/Hive.md @@ -33,17 +33,18 @@ Read all the data in a split in a pollNext call. What splits are read will be sa ## Options -| name | type | required | default value | -|----------------------|--------|----------|---------------| -| table_name | string | yes | - | -| metastore_uri | string | yes | - | -| kerberos_principal | string | no | - | -| kerberos_keytab_path | string | no | - | -| hdfs_site_path | string | no | - | -| hive_site_path | string | no | - | -| read_partitions | list | no | - | -| read_columns | list | no | - | -| common-options | | no | - | +| name | type | required | default value | +|-------------------------------|---------|----------|---------------| +| table_name | string | yes | - | +| metastore_uri | string | yes | - | +| kerberos_principal | string | no | - | +| kerberos_keytab_path | string | no | - | +| hdfs_site_path | string | no | - | +| hive_site_path | string | no | - | +| read_partitions | list | no | - | +| read_columns | list | no | - | +| abort_drop_partition_metadata | boolean | no | true | +| common-options | | no | - | ### table_name [string] @@ -80,6 +81,10 @@ The keytab file path of kerberos authentication The read column list of the data source, user can use it to implement field projection. +### abort_drop_partition_metadata [list] + +Flag to decide whether to drop partition metadata from Hive Metastore during an abort operation. Note: this only affects the metadata in the metastore, the data in the partition will always be deleted(data generated during the synchronization process). + ### common options Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details diff --git a/docs/en/connector-v2/source/Http.md b/docs/en/connector-v2/source/Http.md index 291835b93ed6..f3e6a221bb03 100644 --- a/docs/en/connector-v2/source/Http.md +++ b/docs/en/connector-v2/source/Http.md @@ -52,9 +52,9 @@ They can be downloaded via install-plugin.sh or from the Maven central repositor | format | String | No | json | The format of upstream data, now only support `json` `text`, default `json`. | | method | String | No | get | Http request method, only supports GET, POST method. | | headers | Map | No | - | Http headers. | -| params | Map | No | - | Http params. | -| body | String | No | - | Http body. | -| poll_interval_ms | Int | No | - | Request http api interval(millis) in stream mode. | +| params | Map | No | - | Http params,the program will automatically add http header application/x-www-form-urlencoded. | +| body | String | No | - | Http body,the program will automatically add http header application/json,body is jsonbody. | +| poll_interval_millis | Int | No | - | Request http api interval(millis) in stream mode. | | retry | Int | No | - | The max retry times if request http return to `IOException`. | | retry_backoff_multiplier_ms | Int | No | 100 | The retry-backoff times(millis) multiplier if request http failed. | | retry_backoff_max_ms | Int | No | 10000 | The maximum retry-backoff times(millis) if request http failed | diff --git a/docs/en/connector-v2/source/Iceberg.md b/docs/en/connector-v2/source/Iceberg.md index 6a42ee0ddd30..b6d3924b95f1 100644 --- a/docs/en/connector-v2/source/Iceberg.md +++ b/docs/en/connector-v2/source/Iceberg.md @@ -2,9 +2,15 @@ > Apache Iceberg source connector -## Description +## Support Iceberg Version -Source connector for Apache Iceberg. It can support batch and stream mode. +- 0.14.0 + +## Support Those Engines + +> Spark
+> Flink
+> SeaTunnel Zeta
## Key features @@ -22,126 +28,120 @@ Source connector for Apache Iceberg. It can support batch and stream mode. - [x] hadoop(2.7.1 , 2.7.5 , 3.1.3) - [x] hive(2.3.9 , 3.1.2) -## Options - -| name | type | required | default value | -|--------------------------|---------|----------|----------------------| -| catalog_name | string | yes | - | -| catalog_type | string | yes | - | -| uri | string | no | - | -| warehouse | string | yes | - | -| namespace | string | yes | - | -| table | string | yes | - | -| schema | config | no | - | -| case_sensitive | boolean | no | false | -| start_snapshot_timestamp | long | no | - | -| start_snapshot_id | long | no | - | -| end_snapshot_id | long | no | - | -| use_snapshot_id | long | no | - | -| use_snapshot_timestamp | long | no | - | -| stream_scan_strategy | enum | no | FROM_LATEST_SNAPSHOT | -| common-options | | no | - | - -### catalog_name [string] - -User-specified catalog name. - -### catalog_type [string] - -The optional values are: -- hive: The hive metastore catalog. -- hadoop: The hadoop catalog. - -### uri [string] - -The Hive metastore’s thrift URI. - -### warehouse [string] - -The location to store metadata files and data files. - -### namespace [string] - -The iceberg database name in the backend catalog. - -### table [string] - -The iceberg table name in the backend catalog. - -### case_sensitive [boolean] +## Description -If data columns where selected via schema [config], controls whether the match to the schema will be done with case sensitivity. +Source connector for Apache Iceberg. It can support batch and stream mode. -### schema [config] +## Supported DataSource Info -#### fields [Config] +| Datasource | Dependent | Maven | +|------------|---------------------|---------------------------------------------------------------------------| +| Iceberg | flink-shaded-hadoop | [Download](https://mvnrepository.com/search?q=flink-shaded-hadoop-) | +| Iceberg | hive-exec | [Download](https://mvnrepository.com/artifact/org.apache.hive/hive-exec) | +| Iceberg | libfb303 | [Download](https://mvnrepository.com/artifact/org.apache.thrift/libfb303) | -Use projection to select data columns and columns order. +## Database Dependency -e.g. +> In order to be compatible with different versions of Hadoop and Hive, the scope of hive-exec and flink-shaded-hadoop-2 in the project pom file are provided, so if you use the Flink engine, first you may need to add the following Jar packages to /lib directory, if you are using the Spark engine and integrated with Hadoop, then you do not need to add the following Jar packages. ``` -schema { - fields { - f2 = "boolean" - f1 = "bigint" - f3 = "int" - f4 = "bigint" - } -} +flink-shaded-hadoop-x-xxx.jar +hive-exec-xxx.jar +libfb303-xxx.jar ``` -### start_snapshot_id [long] - -Instructs this scan to look for changes starting from a particular snapshot (exclusive). - -### start_snapshot_timestamp [long] - -Instructs this scan to look for changes starting from the most recent snapshot for the table as of the timestamp. timestamp – the timestamp in millis since the Unix epoch - -### end_snapshot_id [long] - -Instructs this scan to look for changes up to a particular snapshot (inclusive). - -### use_snapshot_id [long] - -Instructs this scan to look for use the given snapshot ID. - -### use_snapshot_timestamp [long] - -Instructs this scan to look for use the most recent snapshot as of the given time in milliseconds. timestamp – the timestamp in millis since the Unix epoch - -### stream_scan_strategy [enum] - -Starting strategy for stream mode execution, Default to use `FROM_LATEST_SNAPSHOT` if don’t specify any value. -The optional values are: -- TABLE_SCAN_THEN_INCREMENTAL: Do a regular table scan then switch to the incremental mode. -- FROM_LATEST_SNAPSHOT: Start incremental mode from the latest snapshot inclusive. -- FROM_EARLIEST_SNAPSHOT: Start incremental mode from the earliest snapshot inclusive. -- FROM_SNAPSHOT_ID: Start incremental mode from a snapshot with a specific id inclusive. -- FROM_SNAPSHOT_TIMESTAMP: Start incremental mode from a snapshot with a specific timestamp inclusive. - -### common options - -Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. - -## Example - -simple +> Some versions of the hive-exec package do not have libfb303-xxx.jar, so you also need to manually import the Jar package. + +## Data Type Mapping + +| Iceberg Data type | SeaTunnel Data type | +|-------------------|---------------------| +| BOOLEAN | BOOLEAN | +| INTEGER | INT | +| LONG | BIGINT | +| FLOAT | FLOAT | +| DOUBLE | DOUBLE | +| DATE | DATE | +| TIME | TIME | +| TIMESTAMP | TIMESTAMP | +| STRING | STRING | +| FIXED
BINARY | BYTES | +| DECIMAL | DECIMAL | +| STRUCT | ROW | +| LIST | ARRAY | +| MAP | MAP | + +## Source Options + +| Name | Type | Required | Default | Description | +|--------------------------|---------|----------|----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| catalog_name | string | yes | - | User-specified catalog name. | +| catalog_type | string | yes | - | The optional values are: hive(The hive metastore catalog),hadoop(The hadoop catalog) | +| uri | string | no | - | The Hive metastore’s thrift URI. | +| warehouse | string | yes | - | The location to store metadata files and data files. | +| namespace | string | yes | - | The iceberg database name in the backend catalog. | +| table | string | yes | - | The iceberg table name in the backend catalog. | +| schema | config | no | - | Use projection to select data columns and columns order. | +| case_sensitive | boolean | no | false | If data columns where selected via schema [config], controls whether the match to the schema will be done with case sensitivity. | +| start_snapshot_timestamp | long | no | - | Instructs this scan to look for changes starting from the most recent snapshot for the table as of the timestamp.
timestamp – the timestamp in millis since the Unix epoch | +| start_snapshot_id | long | no | - | Instructs this scan to look for changes starting from a particular snapshot (exclusive). | +| end_snapshot_id | long | no | - | Instructs this scan to look for changes up to a particular snapshot (inclusive). | +| use_snapshot_id | long | no | - | Instructs this scan to look for use the given snapshot ID. | +| use_snapshot_timestamp | long | no | - | Instructs this scan to look for use the most recent snapshot as of the given time in milliseconds. timestamp – the timestamp in millis since the Unix epoch | +| stream_scan_strategy | enum | no | FROM_LATEST_SNAPSHOT | Starting strategy for stream mode execution, Default to use `FROM_LATEST_SNAPSHOT` if don’t specify any value,The optional values are:
TABLE_SCAN_THEN_INCREMENTAL: Do a regular table scan then switch to the incremental mode.
FROM_LATEST_SNAPSHOT: Start incremental mode from the latest snapshot inclusive.
FROM_EARLIEST_SNAPSHOT: Start incremental mode from the earliest snapshot inclusive.
FROM_SNAPSHOT_ID: Start incremental mode from a snapshot with a specific id inclusive.
FROM_SNAPSHOT_TIMESTAMP: Start incremental mode from a snapshot with a specific timestamp inclusive. | +| common-options | | no | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. | + +## Task Example + +### Simple: ```hocon +env { + execution.parallelism = 2 + job.mode = "BATCH" +} + source { Iceberg { + schema { + fields { + f2 = "boolean" + f1 = "bigint" + f3 = "int" + f4 = "bigint" + f5 = "float" + f6 = "double" + f7 = "date" + f9 = "timestamp" + f10 = "timestamp" + f11 = "string" + f12 = "bytes" + f13 = "bytes" + f14 = "decimal(19,9)" + f15 = "array" + f16 = "map" + } + } catalog_name = "seatunnel" catalog_type = "hadoop" - warehouse = "hdfs://your_cluster//tmp/seatunnel/iceberg/" - namespace = "your_iceberg_database" - table = "your_iceberg_table" + warehouse = "file:///tmp/seatunnel/iceberg/hadoop/" + namespace = "database1" + table = "source" + result_table_name = "iceberg" + } +} + +transform { +} + +sink { + Console { + source_table_name = "iceberg" } } ``` -Or +### Hive Catalog: ```hocon source { @@ -156,7 +156,7 @@ source { } ``` -column projection +### Column Projection: ```hocon source { @@ -179,20 +179,6 @@ source { } ``` -:::tip - -In order to be compatible with different versions of Hadoop and Hive, the scope of hive-exec and flink-shaded-hadoop-2 in the project pom file are provided, so if you use the Flink engine, first you may need to add the following Jar packages to /lib directory, if you are using the Spark engine and integrated with Hadoop, then you do not need to add the following Jar packages. - -::: - -``` -flink-shaded-hadoop-x-xxx.jar -hive-exec-xxx.jar -libfb303-xxx.jar -``` - -Some versions of the hive-exec package do not have libfb303-xxx.jar, so you also need to manually import the Jar package. - ## Changelog ### 2.2.0-beta 2022-09-26 diff --git a/docs/en/connector-v2/source/IoTDB.md b/docs/en/connector-v2/source/IoTDB.md index a20680ce638f..da0f198d3e1b 100644 --- a/docs/en/connector-v2/source/IoTDB.md +++ b/docs/en/connector-v2/source/IoTDB.md @@ -2,14 +2,16 @@ > IoTDB source connector -## Description +## Support Those Engines -Read external data source data through IoTDB. +> Spark
+> Flink
+> SeaTunnel Zeta
## Key features - [x] [batch](../../concept/connector-v2-features.md) -- [ ] [stream](../../concept/connector-v2-features.md) +- [x] [stream](../../concept/connector-v2-features.md) - [x] [exactly-once](../../concept/connector-v2-features.md) - [x] [column projection](../../concept/connector-v2-features.md) @@ -18,106 +20,53 @@ supports query SQL and can achieve projection effect. - [x] [parallelism](../../concept/connector-v2-features.md) - [ ] [support user-defined split](../../concept/connector-v2-features.md) -## Options - -| name | type | required | default value | -|----------------------------|---------|----------|---------------| -| host | string | no | - | -| port | int | no | - | -| node_urls | string | no | - | -| username | string | yes | - | -| password | string | yes | - | -| sql | string | yes | - | -| schema | config | yes | - | -| fetch_size | int | no | - | -| lower_bound | long | no | - | -| upper_bound | long | no | - | -| num_partitions | int | no | - | -| thrift_default_buffer_size | int | no | - | -| enable_cache_leader | boolean | no | - | -| version | string | no | - | -| common-options | | no | - | - -### single node, you need to set host and port to connect to the remote data source. - -**host** [string] the host of the IoTDB when you select host of the IoTDB - -**port** [int] the port of the IoTDB when you select - -### multi node, you need to set node_urls to connect to the remote data source. - -**node_urls** [string] the node_urls of the IoTDB when you select - -e.g. - -```text -127.0.0.1:8080,127.0.0.2:8080 -``` - -### other parameters - -**sql** [string] -execute sql statement e.g. - -``` -select name,age from test -``` - -### schema [config] - -#### fields [Config] - -The schema of the IoTDB that you want to generate - -e.g. - -``` -schema { - fields { - name = string - age = int - } - } -``` - -### option parameters - -### fetch_size [int] - -the fetch_size of the IoTDB when you select - -### username [string] - -the username of the IoTDB when you select - -### password [string] - -the password of the IoTDB when you select - -### lower_bound [long] - -the lower_bound of the IoTDB when you select - -### upper_bound [long] - -the upper_bound of the IoTDB when you select - -### num_partitions [int] - -the num_partitions of the IoTDB when you select - -### thrift_default_buffer_size [int] - -the thrift_default_buffer_size of the IoTDB when you select - -### enable_cache_leader [boolean] - -enable_cache_leader of the IoTDB when you select +## Description -### version [string] +Read external data source data through IoTDB. -Version represents the SQL semantic version used by the client, which is used to be compatible with the SQL semantics of -0.12 when upgrading 0.13. The possible values are: V_0_12, V_0_13. +:::tip + +There is a conflict of thrift version between IoTDB and Spark.Therefore, you need to execute `rm -f $SPARK_HOME/jars/libthrift*` and `cp $IOTDB_HOME/lib/libthrift* $SPARK_HOME/jars/` to resolve it. + +::: + +## Supported DataSource Info + +| Datasource | Supported Versions | Url | +|------------|--------------------|----------------| +| IoTDB | `>= 0.13.0` | localhost:6667 | + +## Data Type Mapping + +| IotDB Data type | SeaTunnel Data type | +|-----------------|---------------------| +| BOOLEAN | BOOLEAN | +| INT32 | TINYINT | +| INT32 | SMALLINT | +| INT32 | INT | +| INT64 | BIGINT | +| FLOAT | FLOAT | +| DOUBLE | DOUBLE | +| TEXT | STRING | + +## Source Options + +| Name | Type | Required | Default Value | Description | +|----------------------------|---------|----------|---------------|------------------------------------------------------------------------------------| +| node_urls | string | yes | - | `IoTDB` cluster address, the format is `"host1:port"` or `"host1:port,host2:port"` | +| username | string | yes | - | `IoTDB` user username | +| password | string | yes | - | `IoTDB` user password | +| sql | string | yes | - | execute sql statement | +| schema | config | yes | - | the data schema | +| fetch_size | int | no | - | the fetch_size of the IoTDB when you select | +| lower_bound | long | no | - | the lower_bound of the IoTDB when you select | +| upper_bound | long | no | - | the upper_bound of the IoTDB when you select | +| num_partitions | int | no | - | the num_partitions of the IoTDB when you select | +| thrift_default_buffer_size | int | no | - | the thrift_default_buffer_size of the IoTDB when you select | +| thrift_max_frame_size | int | no | - | the thrift max frame size | +| enable_cache_leader | boolean | no | - | enable_cache_leader of the IoTDB when you select | +| version | string | no | - | SQL semantic version used by the client, The possible values are: V_0_12, V_0_13 | +| common-options | | no | - | | ### split partitions @@ -157,37 +106,37 @@ Source plugin common parameters, please refer to [Source Common Options](common- ## Examples -### Case1 - -Common options: - ```hocon +env { + execution.parallelism = 2 + job.mode = "BATCH" +} + source { IoTDB { node_urls = "localhost:6667" username = "root" password = "root" + sql = "SELECT temperature, moisture, c_int, c_bigint, c_float, c_double, c_string, c_boolean FROM root.test_group.* WHERE time < 4102329600000 align by device" + schema { + fields { + ts = timestamp + device_name = string + temperature = float + moisture = bigint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_string = string + c_boolean = boolean + } + } } } -``` - -When you assign `sql`、`fields`、`partition`, for example: -```hocon sink { - IoTDB { - ... - sql = "SELECT temperature, moisture FROM root.test_group.* WHERE time < 4102329600000 align by device" - lower_bound = 1 - upper_bound = 4102329600000 - num_partitions = 10 - fields { - ts = bigint - device_name = string - - temperature = float - moisture = bigint - } + Console { } } ``` @@ -195,23 +144,23 @@ sink { Upstream `IoTDB` data format is the following: ```shell -IoTDB> SELECT temperature, moisture FROM root.test_group.* WHERE time < 4102329600000 align by device; -+------------------------+------------------------+--------------+-----------+ -| Time| Device| temperature| moisture| -+------------------------+------------------------+--------------+-----------+ -|2022-09-25T00:00:00.001Z|root.test_group.device_a| 36.1| 100| -|2022-09-25T00:00:00.001Z|root.test_group.device_b| 36.2| 101| -|2022-09-25T00:00:00.001Z|root.test_group.device_c| 36.3| 102| -+------------------------+------------------------+--------------+-----------+ +IoTDB> SELECT temperature, moisture, c_int, c_bigint, c_float, c_double, c_string, c_boolean FROM root.test_group.* WHERE time < 4102329600000 align by device; ++------------------------+------------------------+--------------+-----------+--------+--------------+----------+---------+---------+----------+ +| Time| Device| temperature| moisture| c_int| c_bigint| c_float| c_double| c_string| c_boolean| ++------------------------+------------------------+--------------+-----------+--------+--------------+----------+---------+---------+----------+ +|2022-09-25T00:00:00.001Z|root.test_group.device_a| 36.1| 100| 1| 21474836470| 1.0f| 1.0d| abc| true| +|2022-09-25T00:00:00.001Z|root.test_group.device_b| 36.2| 101| 2| 21474836470| 2.0f| 2.0d| abc| true| +|2022-09-25T00:00:00.001Z|root.test_group.device_c| 36.3| 102| 3| 21474836470| 3.0f| 3.0d| abc| true| ++------------------------+------------------------+--------------+-----------+--------+--------------+----------+---------+---------+----------+ ``` Loaded to SeaTunnelRow data format is the following: -| ts | device_name | temperature | moisture | -|---------------|--------------------------|-------------|----------| -| 1664035200001 | root.test_group.device_a | 36.1 | 100 | -| 1664035200001 | root.test_group.device_b | 36.2 | 101 | -| 1664035200001 | root.test_group.device_c | 36.3 | 102 | +| ts | device_name | temperature | moisture | c_int | c_bigint | c_float | c_double | c_string | c_boolean | +|---------------|--------------------------|-------------|----------|-------|-------------|---------|----------|----------|-----------| +| 1664035200001 | root.test_group.device_a | 36.1 | 100 | 1 | 21474836470 | 1.0f | 1.0d | abc | true | +| 1664035200001 | root.test_group.device_b | 36.2 | 101 | 2 | 21474836470 | 2.0f | 2.0d | abc | true | +| 1664035200001 | root.test_group.device_c | 36.3 | 102 | 3 | 21474836470 | 3.0f | 3.0d | abc | true | ## Changelog diff --git a/docs/en/connector-v2/source/Jdbc.md b/docs/en/connector-v2/source/Jdbc.md index a324316e5946..b86a7b33854b 100644 --- a/docs/en/connector-v2/source/Jdbc.md +++ b/docs/en/connector-v2/source/Jdbc.md @@ -76,11 +76,11 @@ The time in seconds to wait for the database operation used to validate the conn The column name for parallelism's partition, only support numeric type. -### partition_upper_bound [long] +### partition_upper_bound [BigDecimal] The partition_column max value for scan, if not set SeaTunnel will query database get max value. -### partition_lower_bound [long] +### partition_lower_bound [BigDecimal] The partition_column min value for scan, if not set SeaTunnel will query database get min value. @@ -125,6 +125,7 @@ there are some reference value for params above. | Snowflake | net.snowflake.client.jdbc.SnowflakeDriver | jdbc:snowflake://.snowflakecomputing.com | https://mvnrepository.com/artifact/net.snowflake/snowflake-jdbc | | Redshift | com.amazon.redshift.jdbc42.Driver | jdbc:redshift://localhost:5439/testdb?defaultRowFetchSize=1000 | https://mvnrepository.com/artifact/com.amazon.redshift/redshift-jdbc42 | | Vertica | com.vertica.jdbc.Driver | jdbc:vertica://localhost:5433 | https://repo1.maven.org/maven2/com/vertica/jdbc/vertica-jdbc/12.0.3-0/vertica-jdbc-12.0.3-0.jar | +| Kingbase | com.kingbase8.Driver | jdbc:kingbase8://localhost:54321/db_test | https://repo1.maven.org/maven2/cn/com/kingbase/kingbase8/8.6.0/kingbase8-8.6.0.jar | | OceanBase | com.oceanbase.jdbc.Driver | jdbc:oceanbase://localhost:2881 | https://repo1.maven.org/maven2/com/oceanbase/oceanbase-client/2.4.3/oceanbase-client-2.4.3.jar | ## Example @@ -145,15 +146,25 @@ Jdbc { parallel: ``` -Jdbc { - url = "jdbc:mysql://localhost/test?serverTimezone=GMT%2b8" - driver = "com.mysql.cj.jdbc.Driver" - connection_check_timeout_sec = 100 - user = "root" - password = "123456" - query = "select * from type_bin" - partition_column = "id" - partition_num = 10 +env { + execution.parallelism = 10 + job.mode = "BATCH" +} +source { + Jdbc { + url = "jdbc:mysql://localhost/test?serverTimezone=GMT%2b8" + driver = "com.mysql.cj.jdbc.Driver" + connection_check_timeout_sec = 100 + user = "root" + password = "123456" + query = "select * from type_bin" + partition_column = "id" + partition_num = 10 + } +} + +sink { + Console {} } ``` diff --git a/docs/en/connector-v2/source/Jira.md b/docs/en/connector-v2/source/Jira.md index 6452b66c9312..dcfe6cc11d37 100644 --- a/docs/en/connector-v2/source/Jira.md +++ b/docs/en/connector-v2/source/Jira.md @@ -29,7 +29,7 @@ Used to read data from Jira. | body | String | No | - | | json_field | Config | No | - | | content_json | String | No | - | -| poll_interval_ms | int | No | - | +| poll_interval_millis | int | No | - | | retry | int | No | - | | retry_backoff_multiplier_ms | int | No | 100 | | retry_backoff_max_ms | int | No | 10000 | @@ -62,7 +62,7 @@ http params http body -### poll_interval_ms [int] +### poll_interval_millis [int] request http api interval(millis) in stream mode diff --git a/docs/en/connector-v2/source/Kingbase.md b/docs/en/connector-v2/source/Kingbase.md new file mode 100644 index 000000000000..62e280675dd7 --- /dev/null +++ b/docs/en/connector-v2/source/Kingbase.md @@ -0,0 +1,148 @@ +# Kingbase + +> JDBC Kingbase Source Connector + +## Support Connector Version + +- 8.6 + +## Support Those Engines + +> Spark
+> Flink
+> SeaTunnel Zeta
+ +## Key Features + +- [x] [batch](../../concept/connector-v2-features.md) +- [ ] [stream](../../concept/connector-v2-features.md) +- [ ] [exactly-once](../../concept/connector-v2-features.md) +- [x] [column projection](../../concept/connector-v2-features.md) +- [x] [parallelism](../../concept/connector-v2-features.md) +- [x] [support user-defined split](../../concept/connector-v2-features.md) + +## Description + +Read external data source data through JDBC. + +## Supported DataSource Info + +| Datasource | Supported versions | Driver | Url | Maven | +|------------|--------------------|----------------------|------------------------------------------|------------------------------------------------------------------------------------------------| +| Kingbase | 8.6 | com.kingbase8.Driver | jdbc:kingbase8://localhost:54321/db_test | [Download](https://repo1.maven.org/maven2/cn/com/kingbase/kingbase8/8.6.0/kingbase8-8.6.0.jar) | + +## Database Dependency + +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory
+> For example: cp kingbase8-8.6.0.jar $SEATNUNNEL_HOME/plugins/jdbc/lib/ + +## Data Type Mapping + +| Kingbase Data type | SeaTunnel Data type | +|-------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------| +| BOOL | BOOLEAN | +| INT2 | SHORT | +| SMALLSERIAL
SERIAL
INT4 | INT | +| INT8
BIGSERIAL | BIGINT | +| FLOAT4 | FLOAT | +| FLOAT8 | DOUBLE | +| NUMERIC | DECIMAL((Get the designated column's specified column size),
(Gets the designated column's number of digits to right of the decimal point.))) | +| BPCHAR
CHARACTER
VARCHAR
TEXT | STRING | +| TIMESTAMP | LOCALDATETIME | +| TIME | LOCALTIME | +| DATE | LOCALDATE | +| Other data type | Not supported yet | + +## Source Options + +| Name | Type | Required | Default | Description | +|------------------------------|------------|----------|-----------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:kingbase8://localhost:54321/test | +| driver | String | Yes | - | The jdbc class name used to connect to the remote data source, should be `com.kingbase8.Driver`. | +| user | String | No | - | Connection instance user name | +| password | String | No | - | Connection instance password | +| query | String | Yes | - | Query statement | +| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete | +| partition_column | String | No | - | The column name for parallelism's partition, only support numeric type column and string type column. | +| partition_lower_bound | BigDecimal | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. | +| partition_upper_bound | BigDecimal | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. | +| partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. Default value is job parallelism. | +| fetch_size | Int | No | 0 | For queries that return a large number of objects, you can configure
the row fetch size used in the query to improve performance by
reducing the number database hits required to satisfy the selection criteria.
Zero means use jdbc default value. | +| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details | + +### Tips + +> If partition_column is not set, it will run in single concurrency, and if partition_column is set, it will be executed in parallel according to the concurrency of tasks. + +## Task Example + +### Simple: + +``` +env { + execution.parallelism = 2 + job.mode = "BATCH" +} + +source { + Jdbc { + driver = "com.kingbase8.Driver" + url = "jdbc:kingbase8://localhost:54321/db_test" + user = "root" + password = "" + query = "select * from source" + } +} + +transform { + # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, + # please go to https://seatunnel.apache.org/docs/transform/sql +} + +sink { + Console {} +} +``` + +### Parallel: + +> Read your query table in parallel with the shard field you configured and the shard data. You can do this if you want to read the whole table + +``` +source { + Jdbc { + driver = "com.kingbase8.Driver" + url = "jdbc:kingbase8://localhost:54321/db_test" + user = "root" + password = "" + query = "select * from source" + # Parallel sharding reads fields + partition_column = "id" + # Number of fragments + partition_num = 10 + } +} +``` + +### Parallel Boundary: + +> It is more efficient to read your data source according to the upper and lower boundaries you configured + +``` +source { + Jdbc { + driver = "com.kingbase8.Driver" + url = "jdbc:kingbase8://localhost:54321/db_test" + user = "root" + password = "" + query = "select * from source" + partition_column = "id" + partition_num = 10 + # Read start boundary + partition_lower_bound = 1 + # Read end boundary + partition_upper_bound = 500 + } +} +``` + diff --git a/docs/en/connector-v2/source/Klaviyo.md b/docs/en/connector-v2/source/Klaviyo.md index 20ed8ded5015..e80a2434fdf1 100644 --- a/docs/en/connector-v2/source/Klaviyo.md +++ b/docs/en/connector-v2/source/Klaviyo.md @@ -30,7 +30,7 @@ Used to read data from Klaviyo. | body | String | No | - | | json_field | Config | No | - | | content_json | String | No | - | -| poll_interval_ms | int | No | - | +| poll_interval_millis | int | No | - | | retry | int | No | - | | retry_backoff_multiplier_ms | int | No | 100 | | retry_backoff_max_ms | int | No | 10000 | @@ -63,7 +63,7 @@ http params http body -### poll_interval_ms [int] +### poll_interval_millis [int] request http api interval(millis) in stream mode diff --git a/docs/en/connector-v2/source/Lemlist.md b/docs/en/connector-v2/source/Lemlist.md index 5e7c4138c581..76cac3b9bf81 100644 --- a/docs/en/connector-v2/source/Lemlist.md +++ b/docs/en/connector-v2/source/Lemlist.md @@ -28,7 +28,7 @@ Used to read data from Lemlist. | body | String | No | - | | json_field | Config | No | - | | content_json | String | No | - | -| poll_interval_ms | int | No | - | +| poll_interval_millis | int | No | - | | retry | int | No | - | | retry_backoff_multiplier_ms | int | No | 100 | | retry_backoff_max_ms | int | No | 10000 | @@ -57,7 +57,7 @@ http params http body -### poll_interval_ms [int] +### poll_interval_millis [int] request http api interval(millis) in stream mode diff --git a/docs/en/connector-v2/source/MongoDB.md b/docs/en/connector-v2/source/MongoDB.md index 137fb205b8c9..d63d303fa248 100644 --- a/docs/en/connector-v2/source/MongoDB.md +++ b/docs/en/connector-v2/source/MongoDB.md @@ -283,6 +283,10 @@ By utilizing `flat.sync-string`, only one field attribute value can be set, and This operation will perform a string mapping on a single MongoDB data entry. ```bash +env { + execution.parallelism = 10 + job.mode = "BATCH" +} source { MongoDB { uri = "mongodb://user:password@127.0.0.1:27017" @@ -296,6 +300,9 @@ source { } } } +sink { + Console {} +} ``` Use the data samples synchronized with modified parameters, such as the following: diff --git a/docs/en/connector-v2/source/MyHours.md b/docs/en/connector-v2/source/MyHours.md index ec3a93553364..91321990ab2b 100644 --- a/docs/en/connector-v2/source/MyHours.md +++ b/docs/en/connector-v2/source/MyHours.md @@ -2,11 +2,13 @@ > My Hours source connector -## Description +## Support Those Engines -Used to read data from My Hours. +> Spark
+> Flink
+> SeaTunnel Zeta
-## Key features +## Key Features - [x] [batch](../../concept/connector-v2-features.md) - [ ] [stream](../../concept/connector-v2-features.md) @@ -15,71 +17,103 @@ Used to read data from My Hours. - [ ] [parallelism](../../concept/connector-v2-features.md) - [ ] [support user-defined split](../../concept/connector-v2-features.md) -## Options - -| name | type | required | default value | -|-----------------------------|---------|----------|---------------| -| url | String | Yes | - | -| email | String | Yes | - | -| password | String | Yes | - | -| method | String | No | get | -| schema | Config | No | - | -| schema.fields | Config | No | - | -| format | String | No | json | -| params | Map | No | - | -| body | String | No | - | -| json_field | Config | No | - | -| content_json | String | No | - | -| poll_interval_ms | int | No | - | -| retry | int | No | - | -| retry_backoff_multiplier_ms | int | No | 100 | -| retry_backoff_max_ms | int | No | 10000 | -| enable_multi_lines | boolean | No | false | -| common-options | config | No | - | - -### url [String] - -http request url - -### email [String] - -email for login - -### password [String] - -password for login - -### method [String] - -http request method, only supports GET, POST method - -### params [Map] - -http params - -### body [String] - -http body - -### poll_interval_ms [int] +## Description -request http api interval(millis) in stream mode +Used to read data from My Hours. -### retry [int] +## Key features -The max retry times if request http return to `IOException` +- [x] [batch](../../concept/connector-v2-features.md) +- [ ] [stream](../../concept/connector-v2-features.md) +- [ ] [exactly-once](../../concept/connector-v2-features.md) +- [ ] [column projection](../../concept/connector-v2-features.md) +- [ ] [parallelism](../../concept/connector-v2-features.md) +- [ ] [support user-defined split](../../concept/connector-v2-features.md) -### retry_backoff_multiplier_ms [int] +## Supported DataSource Info + +In order to use the My Hours connector, the following dependencies are required. +They can be downloaded via install-plugin.sh or from the Maven central repository. + +| Datasource | Supported Versions | Dependency | +|------------|--------------------|---------------------------------------------------------------------------------------------| +| My Hours | universal | [Download](https://mvnrepository.com/artifact/org.apache.seatunnel/seatunnel-connectors-v2) | + +## Source Options + +| Name | Type | Required | Default | Description | +|-----------------------------|---------|----------|---------|--------------------------------------------------------------------------------------------------------------------------------------| +| url | String | Yes | - | Http request url. | +| email | String | Yes | - | My hours login email address. | +| password | String | Yes | - | My hours login password. | +| schema | Config | No | - | Http and seatunnel data structure mapping | +| schema.fields | Config | No | - | The schema fields of upstream data | +| json_field | Config | No | - | This parameter helps you configure the schema,so this parameter must be used with schema. | +| content_json | String | No | - | This parameter can get some json data.If you only need the data in the 'book' section, configure `content_field = "$.store.book.*"`. | +| format | String | No | json | The format of upstream data, now only support `json` `text`, default `json`. | +| method | String | No | get | Http request method, only supports GET, POST method. | +| headers | Map | No | - | Http headers. | +| params | Map | No | - | Http params. | +| body | String | No | - | Http body. | +| poll_interval_millis | Int | No | - | Request http api interval(millis) in stream mode. | +| retry | Int | No | - | The max retry times if request http return to `IOException`. | +| retry_backoff_multiplier_ms | Int | No | 100 | The retry-backoff times(millis) multiplier if request http failed. | +| retry_backoff_max_ms | Int | No | 10000 | The maximum retry-backoff times(millis) if request http failed | +| enable_multi_lines | Boolean | No | false | | +| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details | + +## How to Create a My Hours Data Synchronization Jobs -The retry-backoff times(millis) multiplier if request http failed +```hocon +env { + execution.parallelism = 1 + job.mode = "BATCH" +} -### retry_backoff_max_ms [int] +MyHours{ + url = "https://api2.myhours.com/api/Projects/getAll" + email = "seatunnel@test.com" + password = "seatunnel" + schema { + fields { + name = string + archived = boolean + dateArchived = string + dateCreated = string + clientName = string + budgetAlertPercent = string + budgetType = int + totalTimeLogged = double + budgetValue = double + totalAmount = double + totalExpense = double + laborCost = double + totalCost = double + billableTimeLogged = double + totalBillableAmount = double + billable = boolean + roundType = int + roundInterval = int + budgetSpentPercentage = double + budgetTarget = int + budgetPeriodType = string + budgetSpent = string + id = string + } + } +} -The maximum retry-backoff times(millis) if request http failed +# Console printing of the read data +sink { + Console { + parallelism = 1 + } +} +``` -### format [String] +## Parameter Interpretation -the format of upstream data, now only support `json` `text`, default `json`. +### format when you assign format is `json`, you should also assign schema option, for example: @@ -98,11 +132,11 @@ you should assign schema as the following: ```hocon schema { - fields { - code = int - data = string - success = boolean - } + fields { + code = int + data = string + success = boolean + } } ``` @@ -131,13 +165,7 @@ connector will generate data as the following: |----------------------------------------------------------| | {"code": 200, "data": "get success", "success": true} | -### schema [Config] - -#### fields [Config] - -the schema fields of upstream data - -### content_json [String] +### content_json This parameter can get some json data.If you only need the data in the 'book' section, configure `content_field = "$.store.book.*"`. @@ -212,14 +240,14 @@ Here is an example: - Test data can be found at this link [mockserver-config.json](../../../../seatunnel-e2e/seatunnel-connector-v2-e2e/connector-http-e2e/src/test/resources/mockserver-config.json) - See this link for task configuration [http_contentjson_to_assert.conf](../../../../seatunnel-e2e/seatunnel-connector-v2-e2e/connector-http-e2e/src/test/resources/http_contentjson_to_assert.conf). -### json_field [Config] +### json_field This parameter helps you configure the schema,so this parameter must be used with schema. If your data looks something like this: ```json -{ +{ "store": { "book": [ { @@ -273,47 +301,6 @@ source { - Test data can be found at this link [mockserver-config.json](../../../../seatunnel-e2e/seatunnel-connector-v2-e2e/connector-http-e2e/src/test/resources/mockserver-config.json) - See this link for task configuration [http_jsonpath_to_assert.conf](../../../../seatunnel-e2e/seatunnel-connector-v2-e2e/connector-http-e2e/src/test/resources/http_jsonpath_to_assert.conf). -### common options - -Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details - -## Example - -```hocon -MyHours{ - url = "https://api2.myhours.com/api/Projects/getAll" - email = "seatunnel@test.com" - password = "seatunnel" - schema { - fields { - name = string - archived = boolean - dateArchived = string - dateCreated = string - clientName = string - budgetAlertPercent = string - budgetType = int - totalTimeLogged = double - budgetValue = double - totalAmount = double - totalExpense = double - laborCost = double - totalCost = double - billableTimeLogged = double - totalBillableAmount = double - billable = boolean - roundType = int - roundInterval = int - budgetSpentPercentage = double - budgetTarget = int - budgetPeriodType = string - budgetSpent = string - id = string - } - } -} -``` - ## Changelog ### next version diff --git a/docs/en/connector-v2/source/MySQL-CDC.md b/docs/en/connector-v2/source/MySQL-CDC.md index caeeca062836..6740fd4b8b2e 100644 --- a/docs/en/connector-v2/source/MySQL-CDC.md +++ b/docs/en/connector-v2/source/MySQL-CDC.md @@ -2,10 +2,9 @@ > MySQL CDC source connector -## Description +## Support Those Engines -The MySQL CDC connector allows for reading snapshot data and incremental data from MySQL database. This document -describes how to set up the MySQL CDC connector to run SQL queries against MySQL databases. +> SeaTunnel Zeta
## Key features @@ -16,207 +15,202 @@ describes how to set up the MySQL CDC connector to run SQL queries against MySQL - [x] [parallelism](../../concept/connector-v2-features.md) - [x] [support user-defined split](../../concept/connector-v2-features.md) -## Options - -| name | type | required | default value | -|------------------------------------------------|----------|----------|---------------| -| username | String | Yes | - | -| password | String | Yes | - | -| database-names | List | No | - | -| table-names | List | Yes | - | -| base-url | String | Yes | - | -| startup.mode | Enum | No | INITIAL | -| startup.timestamp | Long | No | - | -| startup.specific-offset.file | String | No | - | -| startup.specific-offset.pos | Long | No | - | -| stop.mode | Enum | No | NEVER | -| stop.timestamp | Long | No | - | -| stop.specific-offset.file | String | No | - | -| stop.specific-offset.pos | Long | No | - | -| incremental.parallelism | Integer | No | 1 | -| snapshot.split.size | Integer | No | 8096 | -| snapshot.fetch.size | Integer | No | 1024 | -| server-id | String | No | - | -| server-time-zone | String | No | UTC | -| connect.timeout.ms | Duration | No | 30000 | -| connect.max-retries | Integer | No | 3 | -| connection.pool.size | Integer | No | 20 | -| chunk-key.even-distribution.factor.upper-bound | Double | No | 100 | -| chunk-key.even-distribution.factor.lower-bound | Double | No | 0.05 | -| sample-sharding.threshold | int | No | 1000 | -| inverse-sampling.rate | int | No | 1000 | -| exactly_once | Boolean | No | true | -| debezium.* | config | No | - | -| format | Enum | No | DEFAULT | -| common-options | | no | - | - -### username [String] - -Name of the database to use when connecting to the database server. - -### password [String] - -Password to use when connecting to the database server. - -### database-names [List] - -Database name of the database to monitor. - -### table-names [List] - -Table name of the database to monitor. The table name needs to include the database name, for example: database_name.table_name - -### base-url [String] - -URL has to be with database, like "jdbc:mysql://localhost:5432/db" or "jdbc:mysql://localhost:5432/db?useSSL=true". - -### startup.mode [Enum] - -Optional startup mode for MySQL CDC consumer, valid enumerations are "initial", "earliest", "latest" and "specific". - -### startup.timestamp [Long] - -Start from the specified epoch timestamp (in milliseconds). - -**Note, This option is required when the "startup.mode" option used `'timestamp'`.** - -### startup.specific-offset.file [String] - -Start from the specified binlog file name. - -**Note, This option is required when the "startup.mode" option used `'specific'`.** - -### startup.specific-offset.pos [Long] - -Start from the specified binlog file position. - -**Note, This option is required when the "startup.mode" option used `'specific'`.** - -### stop.mode [Enum] - -Optional stop mode for MySQL CDC consumer, valid enumerations are "never". - -### stop.timestamp [Long] - -Stop from the specified epoch timestamp (in milliseconds). - -**Note, This option is required when the "stop.mode" option used `'timestamp'`.** - -### stop.specific-offset.file [String] - -Stop from the specified binlog file name. - -**Note, This option is required when the "stop.mode" option used `'specific'`.** - -### stop.specific-offset.pos [Long] - -Stop from the specified binlog file position. - -**Note, This option is required when the "stop.mode" option used `'specific'`.** - -### incremental.parallelism [Integer] - -The number of parallel readers in the incremental phase. - -### snapshot.split.size [Integer] - -The split size (number of rows) of table snapshot, captured tables are split into multiple splits when read the snapshot -of table. - -### snapshot.fetch.size [Integer] - -The maximum fetch size for per poll when read table snapshot. - -### chunk-key.even-distribution.factor.upper-bound [Double] - -The upper bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be less than or equal to this upper bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is greater, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 100.0. - -### chunk-key.even-distribution.factor.lower-bound [Double] +## Description -The lower bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be greater than or equal to this lower bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is less, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 0.05. +The MySQL CDC connector allows for reading snapshot data and incremental data from MySQL database. This document +describes how to set up the MySQL CDC connector to run SQL queries against MySQL databases. -### sample-sharding.threshold [Integer] +## Supported DataSource Info -This configuration specifies the threshold of estimated shard count to trigger the sample sharding strategy. When the distribution factor is outside the bounds specified by `chunk-key.even-distribution.factor.upper-bound` and `chunk-key.even-distribution.factor.lower-bound`, and the estimated shard count (calculated as approximate row count / chunk size) exceeds this threshold, the sample sharding strategy will be used. This can help to handle large datasets more efficiently. The default value is 1000 shards. +| Datasource | Supported versions | Driver | Url | Maven | +|------------|-------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------|----------------------------------|----------------------------------------------------------------------| +| MySQL |
  • [MySQL](https://dev.mysql.com/doc): 5.6, 5.7, 8.0.x
  • [RDS MySQL](https://www.aliyun.com/product/rds/mysql): 5.6, 5.7, 8.0.x
  • | com.mysql.cj.jdbc.Driver | jdbc:mysql://localhost:3306/test | https://mvnrepository.com/artifact/mysql/mysql-connector-java/8.0.28 | -### inverse-sampling.rate [Integer] +## Database Dependency -The inverse of the sampling rate used in the sample sharding strategy. For example, if this value is set to 1000, it means a 1/1000 sampling rate is applied during the sampling process. This option provides flexibility in controlling the granularity of the sampling, thus affecting the final number of shards. It's especially useful when dealing with very large datasets where a lower sampling rate is preferred. The default value is 1000. +### Install Jdbc Driver -### server-id [String] +Please download and put mysql driver in `${SEATUNNEL_HOME}/lib/` dir. For example: cp mysql-connector-java-xxx.jar `$SEATNUNNEL_HOME/lib/` -A numeric ID or a numeric ID range of this database client, The numeric ID syntax is like '5400', the numeric ID range -syntax is like '5400-5408'. +### Creating MySQL user -Every ID must be unique across all currently-running database processes in the MySQL cluster. This connector joins the -MySQL cluster as another server (with this unique ID) so it can read the binlog. +You have to define a MySQL user with appropriate permissions on all databases that the Debezium MySQL connector monitors. -By default, a random number is generated between 5400 and 6400, though we recommend setting an explicit value. +1. Create the MySQL user: -### server-time-zone [String] +```sql +mysql> CREATE USER 'user'@'localhost' IDENTIFIED BY 'password'; +``` -The session time zone in database server. If not set, then ZoneId.systemDefault() is used to determine the server time zone. +2. Grant the required permissions to the user: -### connect.timeout.ms [long] +```sql +mysql> GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'user' IDENTIFIED BY 'password'; +``` -The maximum time that the connector should wait after trying to connect to the database server before timing out. +3. Finalize the user’s permissions: -### connect.max-retries [Integer] +```sql +mysql> FLUSH PRIVILEGES; +``` -The max retry times that the connector should retry to build database server connection. +### Enabling the MySQL binlog -### connection.pool.size [Integer] +You must enable binary logging for MySQL replication. The binary logs record transaction updates for replication tools to propagate changes. -The connection pool size. +1. Check whether the `log-bin` option is already on: -### exactly_once [Boolean] +```sql +mysql> show variables where variable_name in ('log_bin', 'binlog_format', 'binlog_row_image', 'gtid_mode', 'enforce_gtid_consistency'); ++--------------------------+----------------+ +| Variable_name | Value | ++--------------------------+----------------+ +| binlog_format | ROW | +| binlog_row_image | FULL | +| enforce_gtid_consistency | ON | +| gtid_mode | ON | +| log_bin | ON | ++--------------------------+----------------+ +5 rows in set (0.00 sec) +``` -Enable exactly once semantic. +2. If inconsistent with the above results, configure your MySQL server configuration file(`$MYSQL_HOME/mysql.cnf`) with the following properties, which are described in the table below: -### debezium [Config] +``` +# Enable binary replication log and set the prefix, expiration, and log format. +# The prefix is arbitrary, expiration can be short for integration tests but would +# be longer on a production system. Row-level info is required for ingest to work. +# Server ID is required, but this will vary on production systems +server-id = 223344 +log_bin = mysql-bin +expire_logs_days = 10 +binlog_format = row +binlog_row_image = FULL + +# enable gtid mode +gtid_mode = on +enforce_gtid_consistency = on +``` -Pass-through Debezium's properties to Debezium Embedded Engine which is used to capture data changes from MySQL server. +3. Restart MySQL Server -See more about -the [Debezium's MySQL Connector properties](https://debezium.io/documentation/reference/1.6/connectors/mysql.html#mysql-connector-properties) +```shell +/etc/inint.d/mysqld restart +``` -### format [Enum] +4. Confirm your changes by checking the binlog status once more: + +```sql +mysql> show variables where variable_name in ('log_bin', 'binlog_format', 'binlog_row_image', 'gtid_mode', 'enforce_gtid_consistency'); ++--------------------------+----------------+ +| Variable_name | Value | ++--------------------------+----------------+ +| binlog_format | ROW | +| binlog_row_image | FULL | +| enforce_gtid_consistency | ON | +| gtid_mode | ON | +| log_bin | ON | ++--------------------------+----------------+ +5 rows in set (0.00 sec) +``` -Optional output format for MySQL CDC, valid enumerations are "DEFAULT"、"COMPATIBLE_DEBEZIUM_JSON". +### Notes + +#### Setting up MySQL session timeouts + +When an initial consistent snapshot is made for large databases, your established connection could timeout while the tables are being read. You can prevent this behavior by configuring interactive_timeout and wait_timeout in your MySQL configuration file. +- `interactive_timeout`: The number of seconds the server waits for activity on an interactive connection before closing it. See [MySQL’s documentation](https://dev.mysql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_interactive_timeout) for more details. +- `wait_timeout`: The number of seconds the server waits for activity on a non-interactive connection before closing it. See [MySQL’s documentation](https://dev.mysql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_wait_timeout) for more details. + +*For more database settings see [Debezium MySQL Connector](https://debezium.io/documentation/reference/1.6/connectors/mysql.html#setting-up-mysql)* + +## Data Type Mapping + +| Mysql Data type | SeaTunnel Data type | +|------------------------------------------------------------------------------------------|---------------------| +| BIT(1)
    TINYINT(1) | BOOLEAN | +| TINYINT | TINYINT | +| TINYINT UNSIGNED
    SMALLINT | SMALLINT | +| SMALLINT UNSIGNED
    MEDIUMINT
    MEDIUMINT UNSIGNED
    INT
    INTEGER
    YEAR | INT | +| INT UNSIGNED
    INTEGER UNSIGNED
    BIGINT | BIGINT | +| BIGINT UNSIGNED | DECIMAL(20,0) | +| DECIMAL(p, s)
    DECIMAL(p, s) UNSIGNED
    NUMERIC(p, s)
    NUMERIC(p, s) UNSIGNED | DECIMAL(p,s) | +| FLOAT
    FLOAT UNSIGNED | FLOAT | +| DOUBLE
    DOUBLE UNSIGNED
    REAL
    REAL UNSIGNED | DOUBLE | +| CHAR
    VARCHAR
    TINYTEXT
    MEDIUMTEXT
    TEXT
    LONGTEXT
    ENUM
    JSON | STRING | +| DATE | DATE | +| TIME | TIME | +| DATETIME
    TIMESTAMP | TIMESTAMP | +| BINARY
    VARBINAR
    BIT(p)
    TINYBLOB
    MEDIUMBLOB
    BLOB
    LONGBLOB | BYTES | + +## Source Options + +| Name | Type | Required | Default | Description | +|------------------------------------------------|----------|----------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| base-url | String | Yes | - | The URL of the JDBC connection. Refer to a case: `jdbc:mysql://localhost:3306:3306/test`. | +| username | String | Yes | - | Name of the database to use when connecting to the database server. | +| password | String | Yes | - | Password to use when connecting to the database server. | +| database-names | List | No | - | Database name of the database to monitor. | +| table-names | List | Yes | - | Table name of the database to monitor. The table name needs to include the database name, for example: `database_name.table_name` | +| startup.mode | Enum | No | INITIAL | Optional startup mode for MySQL CDC consumer, valid enumerations are `initial`, `earliest`, `latest` and `specific`.
    `initial`: Synchronize historical data at startup, and then synchronize incremental data.
    `earliest`: Startup from the earliest offset possible.
    `latest`: Startup from the latest offset.
    `specific`: Startup from user-supplied specific offsets. | +| startup.specific-offset.file | String | No | - | Start from the specified binlog file name. **Note, This option is required when the `startup.mode` option used `specific`.** | +| startup.specific-offset.pos | Long | No | - | Start from the specified binlog file position. **Note, This option is required when the `startup.mode` option used `specific`.** | +| stop.mode | Enum | No | NEVER | Optional stop mode for MySQL CDC consumer, valid enumerations are `never`, `latest` or `specific`.
    `never`: Real-time job don't stop the source.
    `latest`: Stop from the latest offset.
    `specific`: Stop from user-supplied specific offset. | +| stop.specific-offset.file | String | No | - | Stop from the specified binlog file name. **Note, This option is required when the `stop.mode` option used `specific`.** | +| stop.specific-offset.pos | Long | No | - | Stop from the specified binlog file position. **Note, This option is required when the `stop.mode` option used `specific`.** | +| snapshot.split.size | Integer | No | 8096 | The split size (number of rows) of table snapshot, captured tables are split into multiple splits when read the snapshot of table. | +| snapshot.fetch.size | Integer | No | 1024 | The maximum fetch size for per poll when read table snapshot. | +| server-id | String | No | - | A numeric ID or a numeric ID range of this database client, The numeric ID syntax is like `5400`, the numeric ID range syntax is like '5400-5408'.
    Every ID must be unique across all currently-running database processes in the MySQL cluster. This connector joins the
    MySQL cluster as another server (with this unique ID) so it can read the binlog.
    By default, a random number is generated between 5400 and 6400, though we recommend setting an explicit value. | +| server-time-zone | String | No | UTC | The session time zone in database server. If not set, then ZoneId.systemDefault() is used to determine the server time zone. | +| connect.timeout.ms | Duration | No | 30000 | The maximum time that the connector should wait after trying to connect to the database server before timing out. | +| connect.max-retries | Integer | No | 3 | The max retry times that the connector should retry to build database server connection. | +| connection.pool.size | Integer | No | 20 | The jdbc connection pool size. | +| chunk-key.even-distribution.factor.upper-bound | Double | No | 100 | The upper bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be less than or equal to this upper bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is greater, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 100.0. | +| chunk-key.even-distribution.factor.lower-bound | Double | No | 0.05 | The lower bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be greater than or equal to this lower bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is less, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 0.05. | +| sample-sharding.threshold | Integer | No | 1000 | This configuration specifies the threshold of estimated shard count to trigger the sample sharding strategy. When the distribution factor is outside the bounds specified by `chunk-key.even-distribution.factor.upper-bound` and `chunk-key.even-distribution.factor.lower-bound`, and the estimated shard count (calculated as approximate row count / chunk size) exceeds this threshold, the sample sharding strategy will be used. This can help to handle large datasets more efficiently. The default value is 1000 shards. | +| inverse-sampling.rate | Integer | No | 1000 | The inverse of the sampling rate used in the sample sharding strategy. For example, if this value is set to 1000, it means a 1/1000 sampling rate is applied during the sampling process. This option provides flexibility in controlling the granularity of the sampling, thus affecting the final number of shards. It's especially useful when dealing with very large datasets where a lower sampling rate is preferred. The default value is 1000. | +| exactly_once | Boolean | No | true | Enable exactly once semantic. | +| format | Enum | No | DEFAULT | Optional output format for MySQL CDC, valid enumerations are `DEFAULT`、`COMPATIBLE_DEBEZIUM_JSON`. | +| debezium | Config | No | - | Pass-through [Debezium's properties](https://debezium.io/documentation/reference/1.6/connectors/mysql.html#mysql-connector-properties) to Debezium Embedded Engine which is used to capture data changes from MySQL server. | +| common-options | | no | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details | + +## Task Example + +### Simple + +> Support multi-table reading -#### example +``` +env { + parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 10000 +} -```conf source { MySQL-CDC { - debezium { - snapshot.mode = "never" - decimal.handling.mode = "double" + catalog = { + factory = MySQL } + base-url = "jdbc:mysql://localhost:3306/testdb" + username = "root" + password = "root@123" + table-names = ["testdb.table1", "testdb.table2"] + + startup.mode = "initial" } } -``` - -### common options - -Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. - -## Example -```Jdbc { -source { - MySQL-CDC { - result_table_name = "fake" - parallelism = 1 - server-id = 5656 - username = "mysqluser" - password = "mysqlpw" - table-names = ["inventory_vwyw0n.products"] - base-url = "jdbc:mysql://localhost:56725/inventory_vwyw0n" +sink { + Console { } } ``` +### Support debezium-compatible format send to kafka + +> Must be used with kafka connector sink, see [compatible debezium format](../formats/cdc-compatible-debezium-json.md) for details + ## Changelog - Add MySQL CDC Source Connector diff --git a/docs/en/connector-v2/source/Mysql.md b/docs/en/connector-v2/source/Mysql.md index d04c7eec3020..bdac5c0aec61 100644 --- a/docs/en/connector-v2/source/Mysql.md +++ b/docs/en/connector-v2/source/Mysql.md @@ -2,6 +2,10 @@ > JDBC Mysql Source Connector +## Support Mysql Version + +- 5.5/5.6/5.7/8.0 + ## Support Those Engines > Spark
    @@ -56,24 +60,24 @@ Read external data source data through JDBC. ## Source Options -| Name | Type | Required | Default | Description | -|------------------------------|--------|----------|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:mysql://localhost:3306:3306/test | -| driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
    if you use MySQL the value is `com.mysql.cj.jdbc.Driver`. | -| user | String | No | - | Connection instance user name | -| password | String | No | - | Connection instance password | -| query | String | Yes | - | Query statement | -| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete | -| partition_column | String | No | - | The column name for parallelism's partition, only support numeric type,Only support numeric type primary key, and only can config one column. | -| partition_lower_bound | Long | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. | -| partition_upper_bound | Long | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. | -| partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. default value is job parallelism | -| fetch_size | Int | No | 0 | For queries that return a large number of objects,you can configure
    the row fetch size used in the query toimprove performance by
    reducing the number database hits required to satisfy the selection criteria.
    Zero means use jdbc default value. | -| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details | +| Name | Type | Required | Default | Description | +|------------------------------|------------|----------|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:mysql://localhost:3306:3306/test | +| driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
    if you use MySQL the value is `com.mysql.cj.jdbc.Driver`. | +| user | String | No | - | Connection instance user name | +| password | String | No | - | Connection instance password | +| query | String | Yes | - | Query statement | +| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete | +| partition_column | String | No | - | The column name for parallelism's partition, only support numeric type,Only support numeric type primary key, and only can config one column. | +| partition_lower_bound | BigDecimal | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. | +| partition_upper_bound | BigDecimal | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. | +| partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. default value is job parallelism | +| fetch_size | Int | No | 0 | For queries that return a large number of objects,you can configure
    the row fetch size used in the query toimprove performance by
    reducing the number database hits required to satisfy the selection criteria.
    Zero means use jdbc default value. | +| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details | ### Tips -> If partition_column is not set, it will run in single concurrency, and if partition_column is set, it will be executed in parallel according to the concurrency of tasks. +> If partition_column is not set, it will run in single concurrency, and if partition_column is set, it will be executed in parallel according to the concurrency of tasks , When your shard read field is a large number type such as bigint(30) and above and the data is not evenly distributed, it is recommended to set the parallelism level to 1 to ensure that the data skew problem is resolved ## Task Example @@ -90,7 +94,7 @@ env { } source{ Jdbc { - url = "jdbc:mysql://localhost:3306/test?serverTimezone=GMT%2b8" + url = "jdbc:mysql://localhost:3306/test?serverTimezone=GMT%2b8&useUnicode=true&characterEncoding=UTF-8&rewriteBatchedStatements=true" driver = "com.mysql.cj.jdbc.Driver" connection_check_timeout_sec = 100 user = "root" @@ -114,9 +118,13 @@ sink { > Read your query table in parallel with the shard field you configured and the shard data You can do this if you want to read the whole table ``` +env { + execution.parallelism = 10 + job.mode = "BATCH" +} source { Jdbc { - url = "jdbc:mysql://localhost:3306/test?serverTimezone=GMT%2b8" + url = "jdbc:mysql://localhost:3306/test?serverTimezone=GMT%2b8&useUnicode=true&characterEncoding=UTF-8&rewriteBatchedStatements=true" driver = "com.mysql.cj.jdbc.Driver" connection_check_timeout_sec = 100 user = "root" @@ -129,6 +137,9 @@ source { partition_num = 10 } } +sink { + Console {} +} ``` ### Parallel Boundary: @@ -138,7 +149,7 @@ source { ``` source { Jdbc { - url = "jdbc:mysql://localhost:3306/test?serverTimezone=GMT%2b8" + url = "jdbc:mysql://localhost:3306/test?serverTimezone=GMT%2b8&useUnicode=true&characterEncoding=UTF-8&rewriteBatchedStatements=true" driver = "com.mysql.cj.jdbc.Driver" connection_check_timeout_sec = 100 user = "root" diff --git a/docs/en/connector-v2/source/Notion.md b/docs/en/connector-v2/source/Notion.md index 186294c6874d..d138c21c1d69 100644 --- a/docs/en/connector-v2/source/Notion.md +++ b/docs/en/connector-v2/source/Notion.md @@ -29,7 +29,7 @@ Used to read data from Notion. | body | String | No | - | | json_field | Config | No | - | | content_json | String | No | - | -| poll_interval_ms | int | No | - | +| poll_interval_millis | int | No | - | | retry | int | No | - | | retry_backoff_multiplier_ms | int | No | 100 | | retry_backoff_max_ms | int | No | 10000 | @@ -62,7 +62,7 @@ http params http body -### poll_interval_ms [int] +### poll_interval_millis [int] request http api interval(millis) in stream mode diff --git a/docs/en/connector-v2/source/OceanBase.md b/docs/en/connector-v2/source/OceanBase.md index 9625ef4fbb94..434e25284ddc 100644 --- a/docs/en/connector-v2/source/OceanBase.md +++ b/docs/en/connector-v2/source/OceanBase.md @@ -71,21 +71,21 @@ Read external data source data through JDBC. ## Source Options -| Name | Type | Required | Default | Description | -|------------------------------|--------|----------|-----------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:oceanbase://localhost:2883/test | -| driver | String | Yes | - | The jdbc class name used to connect to the remote data source, should be `com.oceanbase.jdbc.Driver`. | -| user | String | No | - | Connection instance user name | -| password | String | No | - | Connection instance password | -| compatible_mode | String | Yes | - | The compatible mode of OceanBase, can be 'mysql' or 'oracle'. | -| query | String | Yes | - | Query statement | -| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete | -| partition_column | String | No | - | The column name for parallelism's partition, only support numeric type column and string type column. | -| partition_lower_bound | Long | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. | -| partition_upper_bound | Long | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. | -| partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. Default value is job parallelism. | -| fetch_size | Int | No | 0 | For queries that return a large number of objects, you can configure
    the row fetch size used in the query to improve performance by
    reducing the number database hits required to satisfy the selection criteria.
    Zero means use jdbc default value. | -| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details | +| Name | Type | Required | Default | Description | +|------------------------------|------------|----------|-----------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:oceanbase://localhost:2883/test | +| driver | String | Yes | - | The jdbc class name used to connect to the remote data source, should be `com.oceanbase.jdbc.Driver`. | +| user | String | No | - | Connection instance user name | +| password | String | No | - | Connection instance password | +| compatible_mode | String | Yes | - | The compatible mode of OceanBase, can be 'mysql' or 'oracle'. | +| query | String | Yes | - | Query statement | +| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete | +| partition_column | String | No | - | The column name for parallelism's partition, only support numeric type column and string type column. | +| partition_lower_bound | BigDecimal | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. | +| partition_upper_bound | BigDecimal | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. | +| partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. Default value is job parallelism. | +| fetch_size | Int | No | 0 | For queries that return a large number of objects, you can configure
    the row fetch size used in the query to improve performance by
    reducing the number database hits required to satisfy the selection criteria.
    Zero means use jdbc default value. | +| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details | ### Tips @@ -127,6 +127,10 @@ sink { > Read your query table in parallel with the shard field you configured and the shard data. You can do this if you want to read the whole table ``` +env { + execution.parallelism = 10 + job.mode = "BATCH" +} source { Jdbc { driver = "com.oceanbase.jdbc.Driver" @@ -141,6 +145,9 @@ source { partition_num = 10 } } +sink { + Console {} +} ``` ### Parallel Boundary: diff --git a/docs/en/connector-v2/source/OneSignal.md b/docs/en/connector-v2/source/OneSignal.md index 52636cf5bdac..9fb6d65379be 100644 --- a/docs/en/connector-v2/source/OneSignal.md +++ b/docs/en/connector-v2/source/OneSignal.md @@ -29,7 +29,7 @@ Used to read data from OneSignal. | body | String | No | - | | json_field | Config | No | - | | content_json | String | No | - | -| poll_interval_ms | int | No | - | +| poll_interval_millis | int | No | - | | retry | int | No | - | | retry_backoff_multiplier_ms | int | No | 100 | | retry_backoff_max_ms | int | No | 10000 | @@ -58,7 +58,7 @@ http params http body -### poll_interval_ms [int] +### poll_interval_millis [int] request http api interval(millis) in stream mode diff --git a/docs/en/connector-v2/source/Oracle.md b/docs/en/connector-v2/source/Oracle.md new file mode 100644 index 000000000000..f191cda9d998 --- /dev/null +++ b/docs/en/connector-v2/source/Oracle.md @@ -0,0 +1,161 @@ +# Oracle + +> JDBC Oracle Source Connector + +## Support Those Engines + +> Spark
    +> Flink
    +> SeaTunnel Zeta
    + +## Key Features + +- [x] [batch](../../concept/connector-v2-features.md) +- [ ] [stream](../../concept/connector-v2-features.md) +- [x] [exactly-once](../../concept/connector-v2-features.md) +- [x] [column projection](../../concept/connector-v2-features.md) +- [x] [parallelism](../../concept/connector-v2-features.md) +- [x] [support user-defined split](../../concept/connector-v2-features.md) + +> supports query SQL and can achieve projection effect. + +## Description + +Read external data source data through JDBC. + +## Supported DataSource Info + +| Datasource | Supported versions | Driver | Url | Maven | +|------------|----------------------------------------------------------|--------------------------|----------------------------------------|--------------------------------------------------------------------| +| Oracle | Different dependency version has different driver class. | oracle.jdbc.OracleDriver | jdbc:oracle:thin:@datasource01:1523:xe | https://mvnrepository.com/artifact/com.oracle.database.jdbc/ojdbc8 | + +## Database Dependency + +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory
    +> For example Oracle datasource: cp ojdbc8-xxxxxx.jar $SEATNUNNEL_HOME/lib/
    +> To support the i18n character set, copy the orai18n.jar to the $SEATNUNNEL_HOME/lib/ directory. + +## Data Type Mapping + +| Oracle Data type | SeaTunnel Data type | +|--------------------------------------------------------------------------------------|---------------------| +| INTEGER | INT | +| FLOAT | DECIMAL(38, 18) | +| NUMBER(precision <= 9, scale == 0) | INT | +| NUMBER(9 < precision <= 18, scale == 0) | BIGINT | +| NUMBER(18 < precision, scale == 0) | DECIMAL(38, 0) | +| NUMBER(scale != 0) | DECIMAL(38, 18) | +| BINARY_DOUBLE | DOUBLE | +| BINARY_FLOAT
    REAL | FLOAT | +| CHAR
    NCHAR
    NVARCHAR2
    VARCHAR2
    LONG
    ROWID
    NCLOB
    CLOB
    | STRING | +| DATE | DATE | +| TIMESTAMP
    TIMESTAMP WITH LOCAL TIME ZONE | TIMESTAMP | +| BLOB
    RAW
    LONG RAW
    BFILE | BYTES | + +## Source Options + +| Name | Type | Required | Default | Description | +|------------------------------|------------|----------|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:oracle:thin:@datasource01:1523:xe | +| driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
    if you use MySQL the value is `oracle.jdbc.OracleDriver`. | +| user | String | No | - | Connection instance user name | +| password | String | No | - | Connection instance password | +| query | String | Yes | - | Query statement | +| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete | +| partition_column | String | No | - | The column name for parallelism's partition, only support numeric type,Only support numeric type primary key, and only can config one column. | +| partition_lower_bound | BigDecimal | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. | +| partition_upper_bound | BigDecimal | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. | +| partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. default value is job parallelism | +| fetch_size | Int | No | 0 | For queries that return a large number of objects,you can configure
    the row fetch size used in the query toimprove performance by
    reducing the number database hits required to satisfy the selection criteria.
    Zero means use jdbc default value. | +| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details | + +### Tips + +> If partition_column is not set, it will run in single concurrency, and if partition_column is set, it will be executed in parallel according to the concurrency of tasks. + +## Task Example + +### Simple: + +> This example queries type_bin 'table' 16 data in your test "database" in single parallel and queries all of its fields. You can also specify which fields to query for final output to the console. + +``` +# Defining the runtime environment +env { + # You can set flink configuration here + execution.parallelism = 2 + job.mode = "BATCH" +} +source{ + Jdbc { + url = "jdbc:oracle:thin:@datasource01:1523:xe" + driver = "oracle.jdbc.OracleDriver" + user = "root" + password = "123456" + query = "SELECT * FROM TEST_TABLE" + } +} + +transform { + # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, + # please go to https://seatunnel.apache.org/docs/transform-v2/sql +} + +sink { + Console {} +} +``` + +### Parallel: + +> Read your query table in parallel with the shard field you configured and the shard data You can do this if you want to read the whole table + +``` +env { + execution.parallelism = 10 + job.mode = "BATCH" +} +source { + Jdbc { + url = "jdbc:oracle:thin:@datasource01:1523:xe" + driver = "oracle.jdbc.OracleDriver" + connection_check_timeout_sec = 100 + user = "root" + password = "123456" + # Define query logic as required + query = "SELECT * FROM TEST_TABLE" + # Parallel sharding reads fields + partition_column = "ID" + # Number of fragments + partition_num = 10 + } +} +sink { + Console {} +} +``` + +### Parallel Boundary: + +> It is more efficient to specify the data within the upper and lower bounds of the query It is more efficient to read your data source according to the upper and lower boundaries you configured + +``` +source { + Jdbc { + url = "jdbc:oracle:thin:@datasource01:1523:xe" + driver = "oracle.jdbc.OracleDriver" + connection_check_timeout_sec = 100 + user = "root" + password = "123456" + # Define query logic as required + query = "SELECT * FROM TEST_TABLE" + partition_column = "ID" + # Read start boundary + partition_lower_bound = 1 + # Read end boundary + partition_upper_bound = 500 + partition_num = 10 + } +} +``` + diff --git a/docs/en/connector-v2/source/Persistiq.md b/docs/en/connector-v2/source/Persistiq.md index e102b8b3edd6..c308efbb389c 100644 --- a/docs/en/connector-v2/source/Persistiq.md +++ b/docs/en/connector-v2/source/Persistiq.md @@ -29,7 +29,7 @@ Used to read data from Persistiq. | body | String | No | - | | json_field | Config | No | - | | content_json | String | No | - | -| poll_interval_ms | int | No | - | +| poll_interval_millis | int | No | - | | retry | int | No | - | | retry_backoff_multiplier_ms | int | No | 100 | | retry_backoff_max_ms | int | No | 10000 | @@ -56,7 +56,7 @@ http params http body -### poll_interval_ms [int] +### poll_interval_millis [int] request http api interval(millis) in stream mode diff --git a/docs/en/connector-v2/source/PostgreSQL.md b/docs/en/connector-v2/source/PostgreSQL.md index 3f9e13d2e648..63ddbc25ecf9 100644 --- a/docs/en/connector-v2/source/PostgreSQL.md +++ b/docs/en/connector-v2/source/PostgreSQL.md @@ -38,28 +38,28 @@ Read external data source data through JDBC. ## Data Type Mapping -| PostgreSQL Data type | SeaTunnel Data type | -|----------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------| -| BOOL
    | BOOLEAN | -| _BOOL
    | ARRAY<BOOLEAN> | -| BYTEA
    | BYTES | -| _BYTEA
    | ARRAY<TINYINT> | -| INT2
    SMALLSERIAL
    INT4
    SERIAL
    | INT | -| _INT2
    _INT4
    | ARRAY<INT> | -| INT8
    BIGSERIAL
    | BIGINT | -| _INT8
    | ARRAY<BIGINT> | -| FLOAT4
    | FLOAT | -| _FLOAT4
    | ARRAY<FLOAT> | -| FLOAT8
    | DOUBLE | -| _FLOAT8
    | ARRAY<DOUBLE> | -| NUMERIC(Get the designated column's specified column size>0) | DECIMAL(Get the designated column's specified column size,Gets the number of digits in the specified column to the right of the decimal point) | -| NUMERIC(Get the designated column's specified column size<0) | DECIMAL(38, 18) | -| BPCHAR
    CHARACTER
    VARCHAR
    TEXT
    GEOMETRY
    GEOGRAPHY | STRING | -| _BPCHAR
    _CHARACTER
    _VARCHAR
    _TEXT | ARRAY<STRING> | -| TIMESTAMP
    | TIMESTAMP | -| TIME
    | TIME | -| DATE
    | DATE | -| OTHER DATA TYPES | NOT SUPPORTED YET | +| PostgreSQL Data type | SeaTunnel Data type | +|-----------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------| +| BOOL
    | BOOLEAN | +| _BOOL
    | ARRAY<BOOLEAN> | +| BYTEA
    | BYTES | +| _BYTEA
    | ARRAY<TINYINT> | +| INT2
    SMALLSERIAL
    INT4
    SERIAL
    | INT | +| _INT2
    _INT4
    | ARRAY<INT> | +| INT8
    BIGSERIAL
    | BIGINT | +| _INT8
    | ARRAY<BIGINT> | +| FLOAT4
    | FLOAT | +| _FLOAT4
    | ARRAY<FLOAT> | +| FLOAT8
    | DOUBLE | +| _FLOAT8
    | ARRAY<DOUBLE> | +| NUMERIC(Get the designated column's specified column size>0) | DECIMAL(Get the designated column's specified column size,Gets the number of digits in the specified column to the right of the decimal point) | +| NUMERIC(Get the designated column's specified column size<0) | DECIMAL(38, 18) | +| BPCHAR
    CHARACTER
    VARCHAR
    TEXT
    GEOMETRY
    GEOGRAPHY
    JSON
    JSONB | STRING | +| _BPCHAR
    _CHARACTER
    _VARCHAR
    _TEXT | ARRAY<STRING> | +| TIMESTAMP
    | TIMESTAMP | +| TIME
    | TIME | +| DATE
    | DATE | +| OTHER DATA TYPES | NOT SUPPORTED YET | ## Options @@ -120,6 +120,10 @@ sink { > Read your query table in parallel with the shard field you configured and the shard data You can do this if you want to read the whole table ``` +env { + execution.parallelism = 10 + job.mode = "BATCH" +} source{ jdbc{ url = "jdbc:postgresql://localhost:5432/test" @@ -131,6 +135,9 @@ source{ partition_num = 5 } } +sink { + Console {} +} ``` ### Parallel Boundary: diff --git a/docs/en/connector-v2/source/S3File.md b/docs/en/connector-v2/source/S3File.md index f7ad1cc8bd0f..54124a370382 100644 --- a/docs/en/connector-v2/source/S3File.md +++ b/docs/en/connector-v2/source/S3File.md @@ -1,22 +1,14 @@ # S3File -> S3 file source connector +> S3 File Source Connector -## Description - -Read data from aws s3 file system. - -:::tip - -If you use spark/flink, In order to use this connector, You must ensure your spark/flink cluster already integrated hadoop. The tested hadoop version is 2.x. - -If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you download and install SeaTunnel Engine. You can check the jar package under ${SEATUNNEL_HOME}/lib to confirm this. +## Support Those Engines -To use this connector you need put hadoop-aws-3.1.4.jar and aws-java-sdk-bundle-1.11.271.jar in ${SEATUNNEL_HOME}/lib dir. +> Spark
    +> Flink
    +> SeaTunnel Zeta
    -::: - -## Key features +## Key Features - [x] [batch](../../concept/connector-v2-features.md) - [ ] [stream](../../concept/connector-v2-features.md) @@ -35,104 +27,31 @@ Read all the data in a split in a pollNext call. What splits are read will be sa - [x] json - [x] excel -## Options - -| name | type | required | default value | -|---------------------------------|---------|----------|-------------------------------------------------------| -| path | string | yes | - | -| file_format_type | string | yes | - | -| bucket | string | yes | - | -| fs.s3a.endpoint | string | yes | - | -| fs.s3a.aws.credentials.provider | string | yes | com.amazonaws.auth.InstanceProfileCredentialsProvider | -| read_columns | list | no | - | -| access_key | string | no | - | -| access_secret | string | no | - | -| hadoop_s3_properties | map | no | - | -| delimiter | string | no | \001 | -| parse_partition_from_path | boolean | no | true | -| date_format | string | no | yyyy-MM-dd | -| datetime_format | string | no | yyyy-MM-dd HH:mm:ss | -| time_format | string | no | HH:mm:ss | -| skip_header_row_number | long | no | 0 | -| schema | config | no | - | -| common-options | | no | - | -| sheet_name | string | no | - | -| file_filter_pattern | string | no | - | - -### path [string] - -The source file path. - -### fs.s3a.endpoint [string] - -fs s3a endpoint - -### fs.s3a.aws.credentials.provider [string] - -The way to authenticate s3a. We only support `org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider` and `com.amazonaws.auth.InstanceProfileCredentialsProvider` now. - -More information about the credential provider you can see [Hadoop AWS Document](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html#Simple_name.2Fsecret_credentials_with_SimpleAWSCredentialsProvider.2A) - -### delimiter [string] - -Field delimiter, used to tell connector how to slice and dice fields when reading text files - -default `\001`, the same as hive's default delimiter - -### parse_partition_from_path [boolean] - -Control whether parse the partition keys and values from file path - -For example if you read a file from path `s3n://hadoop-cluster/tmp/seatunnel/parquet/name=tyrantlucifer/age=26` - -Every record data from file will be added these two fields: - -| name | age | -|---------------|-----| -| tyrantlucifer | 26 | - -Tips: **Do not define partition fields in schema option** - -### date_format [string] - -Date type format, used to tell connector how to convert string to date, supported as the following formats: - -`yyyy-MM-dd` `yyyy.MM.dd` `yyyy/MM/dd` - -default `yyyy-MM-dd` - -### datetime_format [string] - -Datetime type format, used to tell connector how to convert string to datetime, supported as the following formats: - -`yyyy-MM-dd HH:mm:ss` `yyyy.MM.dd HH:mm:ss` `yyyy/MM/dd HH:mm:ss` `yyyyMMddHHmmss` - -default `yyyy-MM-dd HH:mm:ss` - -### time_format [string] - -Time type format, used to tell connector how to convert string to time, supported as the following formats: - -`HH:mm:ss` `HH:mm:ss.SSS` - -default `HH:mm:ss` +## Description -### skip_header_row_number [long] +Read data from aws s3 file system. -Skip the first few lines, but only for the txt and csv. +## Supported DataSource Info -For example, set like following: +| Datasource | Supported versions | +|------------|--------------------| +| S3 | current | -`skip_header_row_number = 2` +## Dependency -then SeaTunnel will skip the first 2 lines from source files +> If you use spark/flink, In order to use this connector, You must ensure your spark/flink cluster already integrated hadoop. The tested hadoop version is 2.x.
    +> +> If you use SeaTunnel Zeta, It automatically integrated the hadoop jar when you download and install SeaTunnel Zeta. You can check the jar package under ${SEATUNNEL_HOME}/lib to confirm this.
    +> To use this connector you need put hadoop-aws-3.1.4.jar and aws-java-sdk-bundle-1.11.271.jar in ${SEATUNNEL_HOME}/lib dir. -### file_format_type [string] +## Data Type Mapping -File type, supported as the following file types: +Data type mapping is related to the type of file being read, We supported as the following file types: `text` `csv` `parquet` `orc` `json` `excel` +### JSON File Type + If you assign file type to `json`, you should also assign schema option to tell connector how to parse data to the row you want. For example: @@ -174,7 +93,7 @@ connector will generate data as the following: |------|-------------|---------| | 200 | get success | true | -If you assign file type to `parquet` `orc`, schema option not required, connector can find the schema of upstream data automatically. +### Text Or CSV File Type If you assign file type to `text` `csv`, you can choose to specify the schema information or not. @@ -215,61 +134,102 @@ connector will generate data as the following: |---------------|-----|--------| | tyrantlucifer | 26 | male | -### bucket [string] - -The bucket address of s3 file system, for example: `s3n://seatunnel-test`, if you use `s3a` protocol, this parameter should be `s3a://seatunnel-test`. - -### access_key [string] - -The access key of s3 file system. If this parameter is not set, please confirm that the credential provider chain can be authenticated correctly, you could check this [hadoop-aws](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html) - -### access_secret [string] - -The access secret of s3 file system. If this parameter is not set, please confirm that the credential provider chain can be authenticated correctly, you could check this [hadoop-aws](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html) +### Orc File Type -### hadoop_s3_properties [map] - -If you need to add a other option, you could add it here and refer to this [hadoop-aws](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html) - -``` -hadoop_s3_properties { - "xxx" = "xxx" - } -``` - -### schema [config] - -#### fields [Config] - -The schema of upstream data. - -### read_columns [list] - -The read column list of the data source, user can use it to implement field projection. - -The file type supported column projection as the following shown: - -- text -- json -- csv -- orc -- parquet -- excel +If you assign file type to `parquet` `orc`, schema option not required, connector can find the schema of upstream data automatically. -**Tips: If the user wants to use this feature when reading `text` `json` `csv` files, the schema option must be configured** +| Orc Data type | SeaTunnel Data type | +|----------------------------------|----------------------------------------------------------------| +| BOOLEAN | BOOLEAN | +| INT | INT | +| BYTE | BYTE | +| SHORT | SHORT | +| LONG | LONG | +| FLOAT | FLOAT | +| DOUBLE | DOUBLE | +| BINARY | BINARY | +| STRING
    VARCHAR
    CHAR
    | STRING | +| DATE | LOCAL_DATE_TYPE | +| TIMESTAMP | LOCAL_DATE_TIME_TYPE | +| DECIMAL | DECIMAL | +| LIST(STRING) | STRING_ARRAY_TYPE | +| LIST(BOOLEAN) | BOOLEAN_ARRAY_TYPE | +| LIST(TINYINT) | BYTE_ARRAY_TYPE | +| LIST(SMALLINT) | SHORT_ARRAY_TYPE | +| LIST(INT) | INT_ARRAY_TYPE | +| LIST(BIGINT) | LONG_ARRAY_TYPE | +| LIST(FLOAT) | FLOAT_ARRAY_TYPE | +| LIST(DOUBLE) | DOUBLE_ARRAY_TYPE | +| Map | MapType, This type of K and V will transform to SeaTunnel type | +| STRUCT | SeaTunnelRowType | + +### Parquet File Type -### common options +If you assign file type to `parquet` `orc`, schema option not required, connector can find the schema of upstream data automatically. -Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. +| Orc Data type | SeaTunnel Data type | +|----------------------|----------------------------------------------------------------| +| INT_8 | BYTE | +| INT_16 | SHORT | +| DATE | DATE | +| TIMESTAMP_MILLIS | TIMESTAMP | +| INT64 | LONG | +| INT96 | TIMESTAMP | +| BINARY | BYTES | +| FLOAT | FLOAT | +| DOUBLE | DOUBLE | +| BOOLEAN | BOOLEAN | +| FIXED_LEN_BYTE_ARRAY | TIMESTAMP
    DECIMAL | +| DECIMAL | DECIMAL | +| LIST(STRING) | STRING_ARRAY_TYPE | +| LIST(BOOLEAN) | BOOLEAN_ARRAY_TYPE | +| LIST(TINYINT) | BYTE_ARRAY_TYPE | +| LIST(SMALLINT) | SHORT_ARRAY_TYPE | +| LIST(INT) | INT_ARRAY_TYPE | +| LIST(BIGINT) | LONG_ARRAY_TYPE | +| LIST(FLOAT) | FLOAT_ARRAY_TYPE | +| LIST(DOUBLE) | DOUBLE_ARRAY_TYPE | +| Map | MapType, This type of K and V will transform to SeaTunnel type | +| STRUCT | SeaTunnelRowType | -### sheet_name [string] +## Options -Reader the sheet of the workbook,Only used when file_format_type is excel. +| name | type | required | default value | Description | +|---------------------------------|---------|----------|-------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| path | string | yes | - | The s3 path that needs to be read can have sub paths, but the sub paths need to meet certain format requirements. Specific requirements can be referred to "parse_partition_from_path" option | +| file_format_type | string | yes | - | File type, supported as the following file types: `text` `csv` `parquet` `orc` `json` `excel` | +| bucket | string | yes | - | The bucket address of s3 file system, for example: `s3n://seatunnel-test`, if you use `s3a` protocol, this parameter should be `s3a://seatunnel-test`. | +| fs.s3a.endpoint | string | yes | - | fs s3a endpoint | +| fs.s3a.aws.credentials.provider | string | yes | com.amazonaws.auth.InstanceProfileCredentialsProvider | The way to authenticate s3a. We only support `org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider` and `com.amazonaws.auth.InstanceProfileCredentialsProvider` now. More information about the credential provider you can see [Hadoop AWS Document](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html#Simple_name.2Fsecret_credentials_with_SimpleAWSCredentialsProvider.2A) | +| read_columns | list | no | - | The read column list of the data source, user can use it to implement field projection. The file type supported column projection as the following shown: `text` `csv` `parquet` `orc` `json` `excel` . If the user wants to use this feature when reading `text` `json` `csv` files, the "schema" option must be configured. | +| access_key | string | no | - | Only used when `fs.s3a.aws.credentials.provider = org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider ` | +| access_secret | string | no | - | Only used when `fs.s3a.aws.credentials.provider = org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider ` | +| hadoop_s3_properties | map | no | - | If you need to add other option, you could add it here and refer to this [link](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html) | +| delimiter | string | no | \001 | Field delimiter, used to tell connector how to slice and dice fields when reading text files. Default `\001`, the same as hive's default delimiter. | +| parse_partition_from_path | boolean | no | true | Control whether parse the partition keys and values from file path. For example if you read a file from path `s3n://hadoop-cluster/tmp/seatunnel/parquet/name=tyrantlucifer/age=26`. Every record data from file will be added these two fields: name="tyrantlucifer", age=16 | +| date_format | string | no | yyyy-MM-dd | Date type format, used to tell connector how to convert string to date, supported as the following formats:`yyyy-MM-dd` `yyyy.MM.dd` `yyyy/MM/dd`. default `yyyy-MM-dd` | +| datetime_format | string | no | yyyy-MM-dd HH:mm:ss | Datetime type format, used to tell connector how to convert string to datetime, supported as the following formats:`yyyy-MM-dd HH:mm:ss` `yyyy.MM.dd HH:mm:ss` `yyyy/MM/dd HH:mm:ss` `yyyyMMddHHmmss` | +| time_format | string | no | HH:mm:ss | Time type format, used to tell connector how to convert string to time, supported as the following formats:`HH:mm:ss` `HH:mm:ss.SSS` | +| skip_header_row_number | long | no | 0 | Skip the first few lines, but only for the txt and csv. For example, set like following:`skip_header_row_number = 2`. Then SeaTunnel will skip the first 2 lines from source files | +| schema | config | no | - | The schema of upstream data. | +| common-options | | no | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. | +| sheet_name | string | no | - | Reader the sheet of the workbook,Only used when file_format is excel. | ## Example -```hocon +1. In this example, We read data from s3 path `s3a://seatunnel-test/seatunnel/text` and the file type is orc in this path. + We use `org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider` to authentication so `access_key` and `secret_key` is required. + All columns in the file will be read and send to sink. + +``` +# Defining the runtime environment +env { + # You can set flink configuration here + execution.parallelism = 1 + job.mode = "BATCH" +} +source { S3File { path = "/seatunnel/text" fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" @@ -279,9 +239,21 @@ Reader the sheet of the workbook,Only used when file_format_type is excel. bucket = "s3a://seatunnel-test" file_format_type = "orc" } +} + +transform { + # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, + # please go to https://seatunnel.apache.org/docs/category/transform-v2 +} +sink { + Console {} +} ``` +2. Use `InstanceProfileCredentialsProvider` to authentication + The file type in S3 is json, so need config schema option. + ```hocon S3File { @@ -300,9 +272,47 @@ Reader the sheet of the workbook,Only used when file_format_type is excel. ``` -### file_filter_pattern [string] +3. Use `InstanceProfileCredentialsProvider` to authentication + The file type in S3 is json and has five fields (`id`, `name`, `age`, `sex`, `type`), so need config schema option. + In this job, we only need send `id` and `name` column to mysql. -Filter pattern, which used for filtering files. +``` +# Defining the runtime environment +env { + # You can set flink configuration here + execution.parallelism = 1 + job.mode = "BATCH" +} + +source { + S3File { + path = "/seatunnel/json" + bucket = "s3a://seatunnel-test" + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider="com.amazonaws.auth.InstanceProfileCredentialsProvider" + file_format_type = "json" + read_columns = ["id", "name"] + schema { + fields { + id = int + name = string + age = int + sex = int + type = string + } + } + } +} + +transform { + # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, + # please go to https://seatunnel.apache.org/docs/category/transform-v2 +} + +sink { + Console {} +} +``` ## Changelog diff --git a/docs/en/connector-v2/source/Snowflake.md b/docs/en/connector-v2/source/Snowflake.md index cd824eab4635..a7835013d58a 100644 --- a/docs/en/connector-v2/source/Snowflake.md +++ b/docs/en/connector-v2/source/Snowflake.md @@ -56,20 +56,20 @@ Read external data source data through JDBC. ## Options -| name | type | required | default | description | -|------------------------------|--------|----------|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:snowflake://.snowflakecomputing.com | -| driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
    if you use Snowflake the value is `net.snowflake.client.jdbc.SnowflakeDriver`. | -| user | String | No | - | Connection instance user name | -| password | String | No | - | Connection instance password | -| query | String | Yes | - | Query statement | -| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete | -| partition_column | String | No | - | The column name for parallelism's partition, only support numeric type,Only support numeric type primary key, and only can config one column. | -| partition_lower_bound | Long | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. | -| partition_upper_bound | Long | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. | -| partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. default value is job parallelism | -| fetch_size | Int | No | 0 | For queries that return a large number of objects,you can configure
    the row fetch size used in the query toimprove performance by
    reducing the number database hits required to satisfy the selection criteria.
    Zero means use jdbc default value. | -| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details | +| name | type | required | default | description | +|------------------------------|------------|----------|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:snowflake://.snowflakecomputing.com | +| driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
    if you use Snowflake the value is `net.snowflake.client.jdbc.SnowflakeDriver`. | +| user | String | No | - | Connection instance user name | +| password | String | No | - | Connection instance password | +| query | String | Yes | - | Query statement | +| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete | +| partition_column | String | No | - | The column name for parallelism's partition, only support numeric type,Only support numeric type primary key, and only can config one column. | +| partition_lower_bound | BigDecimal | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. | +| partition_upper_bound | BigDecimal | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. | +| partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. default value is job parallelism | +| fetch_size | Int | No | 0 | For queries that return a large number of objects,you can configure
    the row fetch size used in the query toimprove performance by
    reducing the number database hits required to satisfy the selection criteria.
    Zero means use jdbc default value. | +| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details | ## tips diff --git a/docs/en/connector-v2/source/Vertica.md b/docs/en/connector-v2/source/Vertica.md index 66f18e7a4ed4..df387ac30bf0 100644 --- a/docs/en/connector-v2/source/Vertica.md +++ b/docs/en/connector-v2/source/Vertica.md @@ -56,20 +56,20 @@ Read external data source data through JDBC. ## Source Options -| Name | Type | Required | Default | Description | -|------------------------------|--------|----------|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:vertica://localhost:5433/vertica | -| driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
    if you use Vertica the value is `com.vertica.jdbc.Driver`. | -| user | String | No | - | Connection instance user name | -| password | String | No | - | Connection instance password | -| query | String | Yes | - | Query statement | -| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete | -| partition_column | String | No | - | The column name for parallelism's partition, only support numeric type,Only support numeric type primary key, and only can config one column. | -| partition_lower_bound | Long | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. | -| partition_upper_bound | Long | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. | -| partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. default value is job parallelism | -| fetch_size | Int | No | 0 | For queries that return a large number of objects,you can configure
    the row fetch size used in the query toimprove performance by
    reducing the number database hits required to satisfy the selection criteria.
    Zero means use jdbc default value. | -| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details | +| Name | Type | Required | Default | Description | +|------------------------------|------------|----------|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:vertica://localhost:5433/vertica | +| driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
    if you use Vertica the value is `com.vertica.jdbc.Driver`. | +| user | String | No | - | Connection instance user name | +| password | String | No | - | Connection instance password | +| query | String | Yes | - | Query statement | +| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete | +| partition_column | String | No | - | The column name for parallelism's partition, only support numeric type,Only support numeric type primary key, and only can config one column. | +| partition_lower_bound | BigDecimal | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. | +| partition_upper_bound | BigDecimal | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. | +| partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. default value is job parallelism | +| fetch_size | Int | No | 0 | For queries that return a large number of objects,you can configure
    the row fetch size used in the query toimprove performance by
    reducing the number database hits required to satisfy the selection criteria.
    Zero means use jdbc default value. | +| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details | ### Tips diff --git a/docs/en/seatunnel-engine/checkpoint-storage.md b/docs/en/seatunnel-engine/checkpoint-storage.md index a88f301439e4..f2a6487f28d2 100644 --- a/docs/en/seatunnel-engine/checkpoint-storage.md +++ b/docs/en/seatunnel-engine/checkpoint-storage.md @@ -59,8 +59,6 @@ seatunnel: checkpoint: interval: 6000 timeout: 7000 - max-concurrent: 1 - tolerable-failure: 2 storage: type: hdfs max-retained: 3 @@ -94,8 +92,6 @@ seatunnel: checkpoint: interval: 6000 timeout: 7000 - max-concurrent: 1 - tolerable-failure: 2 storage: type: hdfs max-retained: 3 @@ -119,8 +115,6 @@ seatunnel: checkpoint: interval: 6000 timeout: 7000 - max-concurrent: 1 - tolerable-failure: 2 storage: type: hdfs max-retained: 3 @@ -152,6 +146,28 @@ seatunnel: kerberosKeytab: your-kerberos-keytab ``` +if HDFS is in HA mode , you can config like this: + +```yaml +seatunnel: + engine: + checkpoint: + storage: + type: hdfs + max-retained: 3 + plugin-config: + storage.type: hdfs + fs.defaultFS: hdfs://usdp-bing + seatunnel.hadoop.dfs.nameservices: usdp-bing + seatunnel.hadoop.dfs.ha.namenodes.usdp-bing: nn1,nn2 + seatunnel.hadoop.dfs.namenode.rpc-address.usdp-bing.nn1: usdp-bing-nn1:8020 + seatunnel.hadoop.dfs.namenode.rpc-address.usdp-bing.nn2: usdp-bing-nn2:8020 + seatunnel.hadoop.dfs.client.failover.proxy.provider.usdp-bing: org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" + +``` + +if HDFS has some other configs in `hdfs-site.xml` or `core-site.xml` , just set HDFS config by using `seatunnel.hadoop.` prefix. + #### LocalFile ```yaml @@ -160,8 +176,6 @@ seatunnel: checkpoint: interval: 6000 timeout: 7000 - max-concurrent: 1 - tolerable-failure: 2 storage: type: hdfs max-retained: 3 diff --git a/docs/en/seatunnel-engine/deployment.md b/docs/en/seatunnel-engine/deployment.md index c07cd45d6b1a..18c1a587a2a3 100644 --- a/docs/en/seatunnel-engine/deployment.md +++ b/docs/en/seatunnel-engine/deployment.md @@ -75,14 +75,6 @@ The interval between two checkpoints, unit is milliseconds. If the `checkpoint.i The timeout of a checkpoint. If a checkpoint cannot be completed within the timeout period, a checkpoint failure will be triggered. Therefore, Job will be restored. -**max-concurrent** - -How many checkpoints can be performed simultaneously at most. - -**tolerable-failure** - -Maximum number of retries after checkpoint failure. - Example ``` @@ -95,14 +87,24 @@ seatunnel: checkpoint: interval: 300000 timeout: 10000 - max-concurrent: 1 - tolerable-failure: 2 ``` **checkpoint storage** About the checkpoint storage, you can see [checkpoint storage](checkpoint-storage.md) +### 4.4 Historical Job expiration Config + +The information about each completed Job, such as status, counters, and error logs, is stored in the IMap object. As the number of running jobs increases, the memory increases and eventually the memory will overflow. Therefore, you can adjust the history-job-expire-minutes parameter to solve this problem. The time unit of this parameter is minute. The default value is 1440 minutes, that is, one day. + +Example + +``` +seatunnel: + engine: + history-job-expire-minutes: 1440 +``` + ## 5. Config SeaTunnel Engine Server All SeaTunnel Engine Server config in `hazelcast.yaml` file. diff --git a/docs/en/seatunnel-engine/rest-api.md b/docs/en/seatunnel-engine/rest-api.md index 2edec3496adb..2f44421a3d60 100644 --- a/docs/en/seatunnel-engine/rest-api.md +++ b/docs/en/seatunnel-engine/rest-api.md @@ -180,3 +180,61 @@ network: ------------------------------------------------------------------------------------------ +### Submit Job. + +
    +POST /hazelcast/rest/maps/submit-job (Returns jobId and jobName if job submitted successfully.) + +#### Parameters + +> | name | type | data type | description | +> |----------------------|----------|-----------|-----------------------------------| +> | jobId | optional | string | job id | +> | jobName | optional | string | job name | +> | isStartWithSavePoint | optional | string | if job is started with save point | + +#### Body + +```json +{ + "env": { + "job.mode": "batch" + }, + "source": [ + { + "plugin_name": "FakeSource", + "result_table_name": "fake", + "row.num": 100, + "schema": { + "fields": { + "name": "string", + "age": "int", + "card": "int" + } + } + } + ], + "transform": [ + ], + "sink": [ + { + "plugin_name": "Console", + "source_table_name": ["fake"] + } + ] +} +``` + +#### Responses + +```json +{ + "jobId": 733584788375666689, + "jobName": "rest_api_test" +} +``` + +
    + +------------------------------------------------------------------------------------------ + diff --git a/docs/en/start-v2/locally/deployment.md b/docs/en/start-v2/locally/deployment.md index 6f82a9d84890..1e5c0f9ed620 100644 --- a/docs/en/start-v2/locally/deployment.md +++ b/docs/en/start-v2/locally/deployment.md @@ -23,8 +23,8 @@ Or you can download it by terminal ```shell export version="2.3.2" -wget "https://archive.apache.org/dist/seatunnel/${version}/apache-seatunnel-incubating-${version}-bin.tar.gz" -tar -xzvf "apache-seatunnel-incubating-${version}-bin.tar.gz" +wget "https://archive.apache.org/dist/seatunnel/${version}/apache-seatunnel-${version}-bin.tar.gz" +tar -xzvf "apache-seatunnel-${version}-bin.tar.gz" ``` diff --git a/docs/en/start-v2/locally/quick-start-flink.md b/docs/en/start-v2/locally/quick-start-flink.md index cf01a0fccdb0..9fd9f1eb43d5 100644 --- a/docs/en/start-v2/locally/quick-start-flink.md +++ b/docs/en/start-v2/locally/quick-start-flink.md @@ -68,14 +68,14 @@ You could start the application by the following commands flink version between `1.12.x` and `1.14.x` ```shell -cd "apache-seatunnel-incubating-${version}" +cd "apache-seatunnel-${version}" ./bin/start-seatunnel-flink-13-connector-v2.sh --config ./config/v2.streaming.conf.template ``` flink version between `1.15.x` and `1.16.x` ```shell -cd "apache-seatunnel-incubating-${version}" +cd "apache-seatunnel-${version}" ./bin/start-seatunnel-flink-15-connector-v2.sh --config ./config/v2.streaming.conf.template ``` diff --git a/docs/en/start-v2/locally/quick-start-seatunnel-engine.md b/docs/en/start-v2/locally/quick-start-seatunnel-engine.md index db998897027f..f469c570e3a6 100644 --- a/docs/en/start-v2/locally/quick-start-seatunnel-engine.md +++ b/docs/en/start-v2/locally/quick-start-seatunnel-engine.md @@ -59,7 +59,7 @@ More information about config please check [config concept](../../concept/config You could start the application by the following commands ```shell -cd "apache-seatunnel-incubating-${version}" +cd "apache-seatunnel-${version}" ./bin/seatunnel.sh --config ./config/v2.batch.config.template -e local ``` diff --git a/docs/en/start-v2/locally/quick-start-spark.md b/docs/en/start-v2/locally/quick-start-spark.md index 88aebd5aa439..903217c8ec14 100644 --- a/docs/en/start-v2/locally/quick-start-spark.md +++ b/docs/en/start-v2/locally/quick-start-spark.md @@ -69,7 +69,7 @@ You could start the application by the following commands spark 2.4.x ```bash -cd "apache-seatunnel-incubating-${version}" +cd "apache-seatunnel-${version}" ./bin/start-seatunnel-spark-2-connector-v2.sh \ --master local[4] \ --deploy-mode client \ @@ -79,7 +79,7 @@ cd "apache-seatunnel-incubating-${version}" spark3.x.x ```shell -cd "apache-seatunnel-incubating-${version}" +cd "apache-seatunnel-${version}" ./bin/start-seatunnel-spark-3-connector-v2.sh \ --master local[4] \ --deploy-mode client \ diff --git a/docs/en/transform-v2/sql-udf.md b/docs/en/transform-v2/sql-udf.md index 143044f5a797..ede3ef9ab4a8 100644 --- a/docs/en/transform-v2/sql-udf.md +++ b/docs/en/transform-v2/sql-udf.md @@ -39,16 +39,31 @@ public interface ZetaUDF { ## UDF Implements Example -Add the dependency of transform-v2 and provided scope to your maven project: +Add these dependencies and provided scope to your maven project: ```xml - - org.apache.seatunnel - seatunnel-transforms-v2 - 2.3.x - provided - + + + org.apache.seatunnel + seatunnel-transforms-v2 + 2.3.2 + provided + + + org.apache.seatunnel + seatunnel-api + 2.3.2 + provided + + + com.google.auto.service + auto-service + 1.0.1 + provided + + + ``` Add a Java Class implements of ZetaUDF like this: diff --git a/pom.xml b/pom.xml index 3d619644952b..224a339fc6e4 100644 --- a/pom.xml +++ b/pom.xml @@ -768,6 +768,9 @@ ${spotless.version} + + src/main/java/org/apache/seatunnel/antlr4/generated/*.* + 1.7 diff --git a/release-note.md b/release-note.md index 9ade9c614309..61664d773f48 100644 --- a/release-note.md +++ b/release-note.md @@ -15,6 +15,7 @@ ### Connectors - [Elasticsearch] Support https protocol & compatible with opensearch - [Hbase] Add hbase sink connector #4049 +- [Clickhouse] Fix clickhouse old version compatibility #5326 ### Formats - [Canal]Support read canal format message #3950 - [Debezium]Support debezium canal format message #3981 @@ -81,9 +82,12 @@ - [E2E] [Kafka] Fix kafka e2e testcase (#4520) - [Container Version] Fix risk of unreproducible test cases #4591 - [E2e] [Mysql-cdc] Removing the excess MySqlIncrementalSourceIT e2e reduces the CI time (#4738) +- [E2E] [Common] Update test container version of seatunnel engine (#5323) ## Improve +- [Improve][Connector-V2][Jdbc-Source] Support for Decimal types as splict keys (#4634) + ### Core - [Core] [Spark] Push transform operation from Spark Driver to Executors (#4503) @@ -113,6 +117,7 @@ ### CI - [CI] Fix error repository name in ci config files (#4795) +- [CI][E2E][Zeta] Increase Zeta checkpoint timeout to avoid connector-file-sftp-e2e failed frequently (#5339) ### Zeta(ST-Engine) @@ -154,6 +159,7 @@ - [Connector-V2] [Paimon] Introduce paimon connector (#4178) - [Connector V2] [Cassandra] Expose configurable options in Cassandra (#3681) - [Connector V2] [Jdbc] Supports GEOMETRY data type for PostgreSQL (#4673) +- [Connector V2] [Jdbc] Supports Kingbase database (#4803) - [Transform-V2] Add UDF SPI and an example implement for SQL Transform plugin (#4392) - [Transform-V2] Support copy field list (#4404) - [Transform-V2] Add support CatalogTable for FieldMapperTransform (#4423) @@ -190,3 +196,4 @@ - [Docs] Redshift add defaultRowFetchSize (#4616) - [Docs] Refactor connector-v2 docs using unified format Mysql (#4590) - [Docs] Add Value types in Java to Schema features (#5087) +- [Docs] Replace username by user in the options of FtpFile (#5421) \ No newline at end of file diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/common/metrics/JobMetrics.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/common/metrics/JobMetrics.java index 0149ad26497f..d39e8b96640e 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/common/metrics/JobMetrics.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/common/metrics/JobMetrics.java @@ -26,9 +26,11 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -72,6 +74,10 @@ public JobMetrics merge(JobMetrics jobMetrics) { } Map> metricsMap = new HashMap<>(); metrics.forEach((key, value) -> metricsMap.put(key, new ArrayList<>(value))); + //// Because if a job is restarted, the running node might change, so we need to remove the + // node information. + Set keysToExclude = + new HashSet<>(Arrays.asList(MetricTags.MEMBER, MetricTags.ADDRESS)); jobMetrics.metrics.forEach( (key, value) -> metricsMap.merge( @@ -82,7 +88,11 @@ public JobMetrics merge(JobMetrics jobMetrics) { for (Measurement m1 : v1) { if (v2.stream() .noneMatch( - m2 -> m2.getTags().equals(m1.getTags()))) { + m2 -> + areMapsEqualExcludingKeys( + m2.getTags(), + m1.getTags(), + keysToExclude))) { ms.add(m1); } } @@ -91,6 +101,40 @@ public JobMetrics merge(JobMetrics jobMetrics) { return new JobMetrics(metricsMap); } + /** + * Compares two Map objects excluding certain keys. + * + * @param map1 the first map + * @param map2 the second map + * @param keysToExclude the keys to be excluded during comparison + * @return true if the maps are equal excluding the specific keys, false otherwise + */ + public static boolean areMapsEqualExcludingKeys( + Map map1, Map map2, Set keysToExclude) { + // Return false if either of the maps is null + if (map1 == null || map2 == null) { + return false; + } + + // Return false if the sizes of the maps are different + if (map1.size() != map2.size()) { + return false; + } + + // Create copies of the maps to avoid modifying the original maps + Map map1Copy = new HashMap<>(map1); + Map map2Copy = new HashMap<>(map2); + + // Remove specific keys from the copies + for (String key : keysToExclude) { + map1Copy.remove(key); + map2Copy.remove(key); + } + + // Return whether the copies are equal + return map1Copy.equals(map2Copy); + } + /** Returns all metrics present. */ public Set metrics() { return Collections.unmodifiableSet(metrics.keySet()); diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/common/metrics/ThreadSafeQPSMeter.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/common/metrics/ThreadSafeQPSMeter.java index 627e9bd4ca7e..11a4376d7cd7 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/common/metrics/ThreadSafeQPSMeter.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/common/metrics/ThreadSafeQPSMeter.java @@ -48,7 +48,6 @@ public void markEvent(long n) { VOLATILE_VALUE_UPDATER.addAndGet(this, n); } - @SuppressWarnings("checkstyle:MagicNumber") @Override public double getRate() { long cost = System.currentTimeMillis() - timestamp; diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/configuration/Options.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/configuration/Options.java index a4ce408d73b0..72057aef5f52 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/configuration/Options.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/configuration/Options.java @@ -30,7 +30,7 @@ import java.util.List; import java.util.Map; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; public class Options { diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/env/EnvCommonOptions.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/env/EnvCommonOptions.java index bc80c6642889..d076cd5367bf 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/env/EnvCommonOptions.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/env/EnvCommonOptions.java @@ -51,6 +51,12 @@ public interface EnvCommonOptions { .withDescription( "The interval (in milliseconds) between two consecutive checkpoints."); + Option CHECKPOINT_TIMEOUT = + Options.key("checkpoint.timeout") + .longType() + .noDefaultValue() + .withDescription("The timeout (in milliseconds) for a checkpoint."); + Option JARS = Options.key("jars") .stringType() diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/env/EnvOptionRule.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/env/EnvOptionRule.java index 3a90b82e83bc..09310f080c53 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/env/EnvOptionRule.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/env/EnvOptionRule.java @@ -30,6 +30,7 @@ public static OptionRule getEnvOptionRules() { CommonOptions.PARALLELISM, EnvCommonOptions.JARS, EnvCommonOptions.CHECKPOINT_INTERVAL, + EnvCommonOptions.CHECKPOINT_TIMEOUT, EnvCommonOptions.CUSTOM_PARAMETERS) .build(); } diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/serialization/DefaultSerializer.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/serialization/DefaultSerializer.java index 2100b9529cdc..5fabe2a284a9 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/serialization/DefaultSerializer.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/serialization/DefaultSerializer.java @@ -35,6 +35,9 @@ public byte[] serialize(T obj) throws IOException { @Override public T deserialize(byte[] serialized) throws IOException { + if (serialized == null) { + return null; + } return SerializationUtils.deserialize(serialized); } } diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkWriter.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkWriter.java index c0fbe2c0299c..3b1e715ebee7 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkWriter.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkWriter.java @@ -18,6 +18,7 @@ package org.apache.seatunnel.api.sink; import org.apache.seatunnel.api.common.metrics.MetricsContext; +import org.apache.seatunnel.api.table.event.SchemaChangeEvent; import java.io.IOException; import java.io.Serializable; @@ -44,6 +45,14 @@ public interface SinkWriter { */ void write(T element) throws IOException; + /** + * apply schema change to third party data receiver. + * + * @param event + * @throws IOException + */ + default void applySchemaChange(SchemaChangeEvent event) throws IOException {} + /** * prepare the commit, will be called before {@link #snapshotState(long checkpointId)}. If you * need to use 2pc, you can return the commit info in this method, and receive the commit info diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/Collector.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/Collector.java index 0b924bb570a9..85435880c63b 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/Collector.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/Collector.java @@ -17,6 +17,8 @@ package org.apache.seatunnel.api.source; +import org.apache.seatunnel.api.table.event.SchemaChangeEvent; + /** * A {@link Collector} is used to collect data from {@link SourceReader}. * @@ -26,6 +28,12 @@ public interface Collector { void collect(T record); + default void markSchemaChangeBeforeCheckpoint() {} + + default void collect(SchemaChangeEvent event) {} + + default void markSchemaChangeAfterCheckpoint() {} + /** * Returns the checkpoint lock. * diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TablePath.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TablePath.java index 7b2dd6d5533a..358e873b9915 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TablePath.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TablePath.java @@ -22,6 +22,8 @@ import lombok.RequiredArgsConstructor; import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; @Getter @EqualsAndHashCode @@ -54,14 +56,15 @@ public static TablePath of(String databaseName, String schemaName, String tableN } public String getSchemaAndTableName() { - return String.format("%s.%s", schemaName, tableName); + return getNameCommon(null, schemaName, tableName, null, null); + } + + public String getSchemaAndTableName(String quote) { + return getNameCommon(null, schemaName, tableName, quote, quote); } public String getFullName() { - if (schemaName == null) { - return String.format("%s.%s", databaseName, tableName); - } - return String.format("%s.%s.%s", databaseName, schemaName, tableName); + return getNameCommon(databaseName, schemaName, tableName, null, null); } public String getFullNameWithQuoted() { @@ -69,13 +72,36 @@ public String getFullNameWithQuoted() { } public String getFullNameWithQuoted(String quote) { - if (schemaName == null) { - return String.format( - "%s%s%s.%s%s%s", quote, databaseName, quote, quote, tableName, quote); + return getNameCommon(databaseName, schemaName, tableName, quote, quote); + } + + public String getFullNameWithQuoted(String quoteLeft, String quoteRight) { + return getNameCommon(databaseName, schemaName, tableName, quoteLeft, quoteRight); + } + + private String getNameCommon( + String databaseName, + String schemaName, + String tableName, + String quoteLeft, + String quoteRight) { + List joinList = new ArrayList<>(); + quoteLeft = quoteLeft == null ? "" : quoteLeft; + quoteRight = quoteRight == null ? "" : quoteRight; + + if (databaseName != null) { + joinList.add(quoteLeft + databaseName + quoteRight); + } + + if (schemaName != null) { + joinList.add(quoteLeft + schemaName + quoteRight); } - return String.format( - "%s%s%s.%s%s%s.%s%s%s", - quote, databaseName, quote, quote, schemaName, quote, quote, tableName, quote); + + if (tableName != null) { + joinList.add(quoteLeft + tableName + quoteRight); + } + + return String.join(".", joinList); } @Override diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableAddColumnEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableAddColumnEvent.java new file mode 100644 index 000000000000..967452545265 --- /dev/null +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableAddColumnEvent.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.api.table.event; + +import org.apache.seatunnel.api.table.catalog.Column; +import org.apache.seatunnel.api.table.catalog.TablePath; + +import lombok.Getter; +import lombok.ToString; + +@Getter +@ToString(callSuper = true) +public class AlterTableAddColumnEvent extends AlterTableColumnEvent { + private final Column column; + private final boolean first; + private final String afterColumn; + + public AlterTableAddColumnEvent( + TablePath tablePath, Column column, boolean first, String afterColumn) { + super(tablePath); + this.column = column; + this.first = first; + this.afterColumn = afterColumn; + } + + public static AlterTableAddColumnEvent addFirst(TablePath tablePath, Column column) { + return new AlterTableAddColumnEvent(tablePath, column, true, null); + } + + public static AlterTableAddColumnEvent add(TablePath tablePath, Column column) { + return new AlterTableAddColumnEvent(tablePath, column, false, null); + } + + public static AlterTableAddColumnEvent addAfter( + TablePath tablePath, Column column, String afterColumn) { + return new AlterTableAddColumnEvent(tablePath, column, false, afterColumn); + } +} diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableChangeColumnEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableChangeColumnEvent.java new file mode 100644 index 000000000000..2a8ba71846c3 --- /dev/null +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableChangeColumnEvent.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.api.table.event; + +import org.apache.seatunnel.api.table.catalog.Column; +import org.apache.seatunnel.api.table.catalog.TablePath; + +import lombok.Getter; +import lombok.ToString; + +@Getter +@ToString(callSuper = true) +public class AlterTableChangeColumnEvent extends AlterTableAddColumnEvent { + private final String oldColumn; + + public AlterTableChangeColumnEvent( + TablePath tablePath, + String oldColumn, + Column column, + boolean first, + String afterColumn) { + super(tablePath, column, first, afterColumn); + this.oldColumn = oldColumn; + } + + public static AlterTableChangeColumnEvent changeFirst( + TablePath tablePath, String oldColumn, Column column) { + return new AlterTableChangeColumnEvent(tablePath, oldColumn, column, true, null); + } + + public static AlterTableChangeColumnEvent change( + TablePath tablePath, String oldColumn, Column column) { + return new AlterTableChangeColumnEvent(tablePath, oldColumn, column, false, null); + } + + public static AlterTableChangeColumnEvent changeAfter( + TablePath tablePath, String oldColumn, Column column, String afterColumn) { + return new AlterTableChangeColumnEvent(tablePath, oldColumn, column, false, afterColumn); + } +} diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/record/SchemaBarrier.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableColumnEvent.java similarity index 69% rename from seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/record/SchemaBarrier.java rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableColumnEvent.java index 4268acd86726..a61dccc08d15 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/record/SchemaBarrier.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableColumnEvent.java @@ -15,22 +15,15 @@ * limitations under the License. */ -package org.apache.seatunnel.engine.server.task.record; +package org.apache.seatunnel.api.table.event; -/** Change the schema of the task and flow. */ -public class SchemaBarrier implements Barrier { - @Override - public long getId() { - return -1; - } +import org.apache.seatunnel.api.table.catalog.TablePath; - @Override - public boolean snapshot() { - return false; - } +import lombok.ToString; - @Override - public boolean prepareClose() { - return false; +@ToString(callSuper = true) +public abstract class AlterTableColumnEvent extends AlterTableEvent { + public AlterTableColumnEvent(TablePath tablePath) { + super(tablePath); } } diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableColumnsEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableColumnsEvent.java new file mode 100644 index 000000000000..eb81c67dd193 --- /dev/null +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableColumnsEvent.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.api.table.event; + +import org.apache.seatunnel.api.table.catalog.TablePath; + +import lombok.Getter; +import lombok.ToString; + +import java.util.ArrayList; +import java.util.List; + +@Getter +@ToString(callSuper = true) +public class AlterTableColumnsEvent extends AlterTableEvent { + private final List events; + + public AlterTableColumnsEvent(TablePath tablePath) { + this(tablePath, new ArrayList<>()); + } + + public AlterTableColumnsEvent(TablePath tablePath, List events) { + super(tablePath); + this.events = events; + } + + public AlterTableColumnsEvent addEvent(AlterTableColumnEvent event) { + events.add(event); + return this; + } +} diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableDropColumnEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableDropColumnEvent.java new file mode 100644 index 000000000000..3dbf5294594f --- /dev/null +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableDropColumnEvent.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.api.table.event; + +import org.apache.seatunnel.api.table.catalog.TablePath; + +import lombok.Getter; +import lombok.ToString; + +@Getter +@ToString(callSuper = true) +public class AlterTableDropColumnEvent extends AlterTableColumnEvent { + private final String column; + + public AlterTableDropColumnEvent(TablePath tablePath, String column) { + super(tablePath); + this.column = column; + } +} diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableEvent.java new file mode 100644 index 000000000000..0bf268dc210e --- /dev/null +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableEvent.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.api.table.event; + +import org.apache.seatunnel.api.table.catalog.TablePath; + +import lombok.ToString; + +@ToString(callSuper = true) +public abstract class AlterTableEvent extends TableEvent { + public AlterTableEvent(TablePath tablePath) { + super(tablePath); + } +} diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableModifyColumnEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableModifyColumnEvent.java new file mode 100644 index 000000000000..97be83f719eb --- /dev/null +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableModifyColumnEvent.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.api.table.event; + +import org.apache.seatunnel.api.table.catalog.Column; +import org.apache.seatunnel.api.table.catalog.TablePath; + +import lombok.Getter; +import lombok.ToString; + +@Getter +@ToString(callSuper = true) +public class AlterTableModifyColumnEvent extends AlterTableAddColumnEvent { + public AlterTableModifyColumnEvent( + TablePath tablePath, Column column, boolean first, String afterColumn) { + super(tablePath, column, first, afterColumn); + } + + public static AlterTableModifyColumnEvent modifyFirst(TablePath tablePath, Column column) { + return new AlterTableModifyColumnEvent(tablePath, column, true, null); + } + + public static AlterTableModifyColumnEvent modify(TablePath tablePath, Column column) { + return new AlterTableModifyColumnEvent(tablePath, column, false, null); + } + + public static AlterTableModifyColumnEvent modifyAfter( + TablePath tablePath, Column column, String afterColumn) { + return new AlterTableModifyColumnEvent(tablePath, column, false, afterColumn); + } +} diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableNameEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableNameEvent.java new file mode 100644 index 000000000000..cc01a916031b --- /dev/null +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableNameEvent.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.api.table.event; + +import org.apache.seatunnel.api.table.catalog.TablePath; + +import lombok.Getter; +import lombok.ToString; + +@Getter +@ToString(callSuper = true) +public class AlterTableNameEvent extends AlterTableColumnEvent { + private final TablePath newTablePath; + + public AlterTableNameEvent(TablePath tablePath, TablePath newTablePath) { + super(tablePath); + this.newTablePath = newTablePath; + } +} diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/SchemaChangeEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/SchemaChangeEvent.java new file mode 100644 index 000000000000..3f01d8f867fd --- /dev/null +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/SchemaChangeEvent.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.api.table.event; + +import org.apache.seatunnel.api.table.catalog.TablePath; + +import java.io.Serializable; + +/** Represents a structural change to a table schema. */ +public interface SchemaChangeEvent extends Serializable { + + /** + * Path of the change table object + * + * @return + */ + TablePath tablePath(); +} diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/TableEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/TableEvent.java new file mode 100644 index 000000000000..b81f18f88763 --- /dev/null +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/TableEvent.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.api.table.event; + +import org.apache.seatunnel.api.table.catalog.TablePath; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; + +@Getter +@ToString +@RequiredArgsConstructor +public abstract class TableEvent implements SchemaChangeEvent { + protected final TablePath tablePath; + + @Override + public TablePath tablePath() { + return tablePath; + } +} diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/AlterTableEventHandler.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/AlterTableEventHandler.java new file mode 100644 index 000000000000..b020e66a2a3e --- /dev/null +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/AlterTableEventHandler.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.api.table.event.handler; + +import org.apache.seatunnel.api.table.catalog.Column; +import org.apache.seatunnel.api.table.event.AlterTableAddColumnEvent; +import org.apache.seatunnel.api.table.event.AlterTableChangeColumnEvent; +import org.apache.seatunnel.api.table.event.AlterTableColumnEvent; +import org.apache.seatunnel.api.table.event.AlterTableColumnsEvent; +import org.apache.seatunnel.api.table.event.AlterTableDropColumnEvent; +import org.apache.seatunnel.api.table.event.AlterTableEvent; +import org.apache.seatunnel.api.table.event.AlterTableModifyColumnEvent; +import org.apache.seatunnel.api.table.event.AlterTableNameEvent; +import org.apache.seatunnel.api.table.event.SchemaChangeEvent; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; + +public class AlterTableEventHandler implements DataTypeChangeEventHandler { + private SeaTunnelRowType dataType; + + @Override + public SeaTunnelRowType get() { + return dataType; + } + + @Override + public DataTypeChangeEventHandler reset(SeaTunnelRowType dataType) { + this.dataType = dataType; + return this; + } + + @Override + public SeaTunnelRowType apply(SchemaChangeEvent event) { + AlterTableEvent alterTableEvent = (AlterTableEvent) event; + return apply(dataType, alterTableEvent); + } + + private SeaTunnelRowType apply(SeaTunnelRowType dataType, AlterTableEvent alterTableEvent) { + if (alterTableEvent instanceof AlterTableNameEvent) { + return dataType; + } + if (alterTableEvent instanceof AlterTableDropColumnEvent) { + return applyDropColumn(dataType, (AlterTableDropColumnEvent) alterTableEvent); + } + if (alterTableEvent instanceof AlterTableModifyColumnEvent) { + return applyModifyColumn(dataType, (AlterTableModifyColumnEvent) alterTableEvent); + } + if (alterTableEvent instanceof AlterTableChangeColumnEvent) { + return applyChangeColumn(dataType, (AlterTableChangeColumnEvent) alterTableEvent); + } + if (alterTableEvent instanceof AlterTableAddColumnEvent) { + return applyAddColumn(dataType, (AlterTableAddColumnEvent) alterTableEvent); + } + if (alterTableEvent instanceof AlterTableColumnsEvent) { + SeaTunnelRowType newType = dataType; + for (AlterTableColumnEvent columnEvent : + ((AlterTableColumnsEvent) alterTableEvent).getEvents()) { + newType = apply(newType, columnEvent); + } + return newType; + } + + throw new UnsupportedOperationException( + "Unsupported alter table event: " + alterTableEvent); + } + + private SeaTunnelRowType applyAddColumn( + SeaTunnelRowType dataType, AlterTableAddColumnEvent addColumnEvent) { + LinkedList originFields = new LinkedList<>(Arrays.asList(dataType.getFieldNames())); + LinkedList> originFieldTypes = + new LinkedList<>(Arrays.asList(dataType.getFieldTypes())); + Column column = addColumnEvent.getColumn(); + if (originFields.contains(column.getName())) { + return applyModifyColumn( + dataType, + new AlterTableModifyColumnEvent( + addColumnEvent.tablePath(), + addColumnEvent.getColumn(), + addColumnEvent.isFirst(), + addColumnEvent.getAfterColumn())); + } + + if (addColumnEvent.isFirst()) { + originFields.addFirst(column.getName()); + originFieldTypes.addFirst(column.getDataType()); + } else if (addColumnEvent.getAfterColumn() != null) { + int index = originFields.indexOf(addColumnEvent.getAfterColumn()); + originFields.add(index + 1, column.getName()); + originFieldTypes.add(index + 1, column.getDataType()); + } else { + originFields.addLast(column.getName()); + originFieldTypes.addLast(column.getDataType()); + } + + return new SeaTunnelRowType( + originFields.toArray(new String[0]), + originFieldTypes.toArray(new SeaTunnelDataType[0])); + } + + private SeaTunnelRowType applyDropColumn( + SeaTunnelRowType dataType, AlterTableDropColumnEvent dropColumnEvent) { + List fieldNames = new ArrayList<>(); + List fieldTypes = new ArrayList<>(); + for (int i = 0; i < dataType.getTotalFields(); i++) { + if (dataType.getFieldName(i).equals(dropColumnEvent.getColumn())) { + continue; + } + fieldNames.add(dataType.getFieldName(i)); + fieldTypes.add(dataType.getFieldType(i)); + } + return new SeaTunnelRowType( + fieldNames.toArray(new String[0]), fieldTypes.toArray(new SeaTunnelDataType[0])); + } + + private SeaTunnelRowType applyModifyColumn( + SeaTunnelRowType dataType, AlterTableModifyColumnEvent modifyColumnEvent) { + List fieldNames = Arrays.asList(dataType.getFieldNames()); + if (!fieldNames.contains(modifyColumnEvent.getColumn().getName())) { + return dataType; + } + + String modifyColumnName = modifyColumnEvent.getColumn().getName(); + int modifyColumnIndex = dataType.indexOf(modifyColumnName); + return applyModifyColumn( + dataType, + modifyColumnIndex, + modifyColumnEvent.getColumn(), + modifyColumnEvent.isFirst(), + modifyColumnEvent.getAfterColumn()); + } + + private SeaTunnelRowType applyChangeColumn( + SeaTunnelRowType dataType, AlterTableChangeColumnEvent changeColumnEvent) { + String oldColumn = changeColumnEvent.getOldColumn(); + int oldColumnIndex = dataType.indexOf(oldColumn); + + return applyModifyColumn( + dataType, + oldColumnIndex, + changeColumnEvent.getColumn(), + changeColumnEvent.isFirst(), + changeColumnEvent.getAfterColumn()); + } + + private SeaTunnelRowType applyModifyColumn( + SeaTunnelRowType dataType, + int columnIndex, + Column column, + boolean first, + String afterColumn) { + LinkedList originFields = new LinkedList<>(Arrays.asList(dataType.getFieldNames())); + LinkedList> originFieldTypes = + new LinkedList<>(Arrays.asList(dataType.getFieldTypes())); + + if (first) { + originFields.remove(columnIndex); + originFieldTypes.remove(columnIndex); + + originFields.addFirst(column.getName()); + originFieldTypes.addFirst(column.getDataType()); + } else if (afterColumn != null) { + originFields.remove(columnIndex); + originFieldTypes.remove(columnIndex); + + int index = originFields.indexOf(afterColumn); + originFields.add(index + 1, column.getName()); + originFieldTypes.add(index + 1, column.getDataType()); + } else { + originFields.set(columnIndex, column.getName()); + originFieldTypes.set(columnIndex, column.getDataType()); + } + return new SeaTunnelRowType( + originFields.toArray(new String[0]), + originFieldTypes.toArray(new SeaTunnelDataType[0])); + } +} diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/DataTypeChangeEventDispatcher.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/DataTypeChangeEventDispatcher.java new file mode 100644 index 000000000000..ec4f69334f7a --- /dev/null +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/DataTypeChangeEventDispatcher.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.api.table.event.handler; + +import org.apache.seatunnel.api.table.event.AlterTableAddColumnEvent; +import org.apache.seatunnel.api.table.event.AlterTableChangeColumnEvent; +import org.apache.seatunnel.api.table.event.AlterTableColumnsEvent; +import org.apache.seatunnel.api.table.event.AlterTableDropColumnEvent; +import org.apache.seatunnel.api.table.event.AlterTableEvent; +import org.apache.seatunnel.api.table.event.AlterTableModifyColumnEvent; +import org.apache.seatunnel.api.table.event.AlterTableNameEvent; +import org.apache.seatunnel.api.table.event.SchemaChangeEvent; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; + +import lombok.extern.slf4j.Slf4j; + +import java.util.HashMap; +import java.util.Map; + +@Slf4j +public class DataTypeChangeEventDispatcher implements DataTypeChangeEventHandler { + + private final Map handlers; + private SeaTunnelRowType dataType; + + public DataTypeChangeEventDispatcher() { + this.handlers = createHandlers(); + } + + @Override + public SeaTunnelRowType get() { + return dataType; + } + + @Override + public DataTypeChangeEventHandler reset(SeaTunnelRowType dataType) { + this.dataType = dataType; + return this; + } + + @Override + public SeaTunnelRowType apply(SchemaChangeEvent event) { + DataTypeChangeEventHandler handler = handlers.get(event.getClass()); + if (handler == null) { + log.warn("No DataTypeChangeEventHandler for event: {}", event.getClass()); + return dataType; + } + return handler.reset(dataType).apply(event); + } + + private static Map createHandlers() { + Map handlers = new HashMap<>(); + + AlterTableEventHandler alterTableEventHandler = new AlterTableEventHandler(); + handlers.put(AlterTableEvent.class, alterTableEventHandler); + handlers.put(AlterTableNameEvent.class, alterTableEventHandler); + handlers.put(AlterTableColumnsEvent.class, alterTableEventHandler); + handlers.put(AlterTableAddColumnEvent.class, alterTableEventHandler); + handlers.put(AlterTableModifyColumnEvent.class, alterTableEventHandler); + handlers.put(AlterTableDropColumnEvent.class, alterTableEventHandler); + handlers.put(AlterTableChangeColumnEvent.class, alterTableEventHandler); + return handlers; + } +} diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/DataTypeChangeEventHandler.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/DataTypeChangeEventHandler.java new file mode 100644 index 000000000000..01d8924d531b --- /dev/null +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/DataTypeChangeEventHandler.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.api.table.event.handler; + +import org.apache.seatunnel.api.table.event.SchemaChangeEvent; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; + +public interface DataTypeChangeEventHandler extends SchemaChangeEventHandler { + + SeaTunnelRowType get(); + + DataTypeChangeEventHandler reset(SeaTunnelRowType dataType); + + default SeaTunnelRowType handle(SchemaChangeEvent event) { + if (get() == null) { + throw new IllegalStateException("DataTypeChanger not reset"); + } + + try { + return apply(event); + } finally { + reset(null); + if (get() != null) { + throw new IllegalStateException("DataTypeChanger not reset"); + } + } + } + + SeaTunnelRowType apply(SchemaChangeEvent event); +} diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/SchemaChangeEventHandler.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/SchemaChangeEventHandler.java new file mode 100644 index 000000000000..167dc6cc315e --- /dev/null +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/SchemaChangeEventHandler.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.api.table.event.handler; + +import org.apache.seatunnel.api.table.event.SchemaChangeEvent; + +import java.io.Serializable; + +public interface SchemaChangeEventHandler extends Serializable { + + T handle(SchemaChangeEvent event); +} diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRowType.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRowType.java index 6bf0a2a865db..4eedb2255ad6 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRowType.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRowType.java @@ -20,7 +20,7 @@ import java.util.Arrays; import java.util.List; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; public class SeaTunnelRowType implements CompositeType { private static final long serialVersionUID = 2L; diff --git a/seatunnel-api/src/test/java/org/apache/seatunnel/api/configuration/ReadableConfigTest.java b/seatunnel-api/src/test/java/org/apache/seatunnel/api/configuration/ReadableConfigTest.java index b1436edd0fc5..ffaae72d0f10 100644 --- a/seatunnel-api/src/test/java/org/apache/seatunnel/api/configuration/ReadableConfigTest.java +++ b/seatunnel-api/src/test/java/org/apache/seatunnel/api/configuration/ReadableConfigTest.java @@ -35,7 +35,6 @@ import java.util.List; import java.util.Map; -@SuppressWarnings("checkstyle:StaticVariableName") public class ReadableConfigTest { private static final String CONFIG_PATH = "/conf/option-test.conf"; private static ReadonlyConfig config; diff --git a/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/RetryUtils.java b/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/RetryUtils.java index aa1bbd5934bd..e8ee03a5013a 100644 --- a/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/RetryUtils.java +++ b/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/RetryUtils.java @@ -66,7 +66,7 @@ public static T retryWithException( backoff); Thread.sleep(backoff); } else { - log.debug(attemptMessage, ExceptionUtils.getMessage(e), i, retryTimes, 0); + log.info(attemptMessage, ExceptionUtils.getMessage(e), i, retryTimes, 0); } } } diff --git a/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/SerializationUtils.java b/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/SerializationUtils.java index d0e9a5b941b7..46494bc5c525 100644 --- a/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/SerializationUtils.java +++ b/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/SerializationUtils.java @@ -44,7 +44,6 @@ public static T stringToObject(String str) { return null; } - @SuppressWarnings("checkstyle:MagicNumber") public static byte[] serialize(T obj) { try (ByteArrayOutputStream b = new ByteArrayOutputStream(512); ObjectOutputStream out = new ObjectOutputStream(b)) { diff --git a/seatunnel-common/src/test/java/org/apache/seatunnel/common/utils/SerializationUtilsTest.java b/seatunnel-common/src/test/java/org/apache/seatunnel/common/utils/SerializationUtilsTest.java index 8121a6b42c9f..b5085d85b2c8 100644 --- a/seatunnel-common/src/test/java/org/apache/seatunnel/common/utils/SerializationUtilsTest.java +++ b/seatunnel-common/src/test/java/org/apache/seatunnel/common/utils/SerializationUtilsTest.java @@ -23,7 +23,6 @@ import java.util.ArrayList; import java.util.HashMap; -@SuppressWarnings("checkstyle:RegexpSingleline") public class SerializationUtilsTest { @Test diff --git a/seatunnel-config/seatunnel-config-shade/src/main/java/org/apache/seatunnel/shade/com/typesafe/config/impl/PropertiesParser.java b/seatunnel-config/seatunnel-config-shade/src/main/java/org/apache/seatunnel/shade/com/typesafe/config/impl/PropertiesParser.java index eceacf997972..3cfdb7dba3b5 100644 --- a/seatunnel-config/seatunnel-config-shade/src/main/java/org/apache/seatunnel/shade/com/typesafe/config/impl/PropertiesParser.java +++ b/seatunnel-config/seatunnel-config-shade/src/main/java/org/apache/seatunnel/shade/com/typesafe/config/impl/PropertiesParser.java @@ -7,8 +7,7 @@ import java.io.IOException; import java.io.Reader; import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; +import java.util.Arrays; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; @@ -58,7 +57,15 @@ private static AbstractConfigObject fromEntrySet( } private static Map getPathMap(Set> entries) { - Map pathMap = new LinkedHashMap(); + Map pathMap = new LinkedHashMap<>(); + System.getProperties() + .forEach( + (key, value) -> { + if (key instanceof String) { + Path path = pathFromPropertyKey((String) key); + pathMap.put(path, value); + } + }); for (Map.Entry entry : entries) { Object key = entry.getKey(); if (key instanceof String) { @@ -74,7 +81,7 @@ static AbstractConfigObject fromStringMap(ConfigOrigin origin, Map pathExpressionMap) { - Map pathMap = new LinkedHashMap(); + Map pathMap = new LinkedHashMap<>(); for (Map.Entry entry : pathExpressionMap.entrySet()) { Object keyObj = entry.getKey(); if (!(keyObj instanceof String)) { @@ -93,8 +100,8 @@ private static AbstractConfigObject fromPathMap( * First, build a list of paths that will have values, either string or * object values. */ - Set scopePaths = new LinkedHashSet(); - Set valuePaths = new LinkedHashSet(); + Set scopePaths = new LinkedHashSet<>(); + Set valuePaths = new LinkedHashSet<>(); for (Path path : pathMap.keySet()) { // add value's path valuePaths.add(path); @@ -129,13 +136,11 @@ private static AbstractConfigObject fromPathMap( /* * Create maps for the object-valued values. */ - Map root = new LinkedHashMap(); - Map> scopes = - new LinkedHashMap>(); + Map root = new LinkedHashMap<>(); + Map> scopes = new LinkedHashMap<>(); for (Path path : scopePaths) { - Map scope = - new LinkedHashMap(); + Map scope = new LinkedHashMap<>(); scopes.put(path, scope); } @@ -150,7 +155,17 @@ private static AbstractConfigObject fromPathMap( AbstractConfigValue value; if (convertedFromProperties) { if (rawValue instanceof String) { - value = new ConfigString.Quoted(origin, (String) rawValue); + if (((String) rawValue).startsWith("[") && ((String) rawValue).endsWith("]")) { + List list = + Arrays.asList( + ((String) rawValue) + .substring(1, ((String) rawValue).length() - 1) + .split(",")); + value = ConfigImpl.fromAnyRef(list, origin, FromMapMode.KEYS_ARE_PATHS); + } else { + value = new ConfigString.Quoted(origin, (String) rawValue); + } + } else { // silently ignore non-string values in Properties value = null; @@ -167,19 +182,14 @@ private static AbstractConfigObject fromPathMap( * Make a list of scope paths from longest to shortest, so children go * before parents. */ - List sortedScopePaths = new ArrayList(); - sortedScopePaths.addAll(scopePaths); + List sortedScopePaths = new ArrayList<>(scopePaths); // sort descending by length - Collections.sort( - sortedScopePaths, - new Comparator() { - @Override - public int compare(Path a, Path b) { - // Path.length() is O(n) so in theory this sucks - // but in practice we can make Path precompute length - // if it ever matters. - return b.length() - a.length(); - } + sortedScopePaths.sort( + (a, b) -> { + // Path.length() is O(n) so in theory this sucks + // but in practice we can make Path precompute length + // if it ever matters. + return b.length() - a.length(); }); /* diff --git a/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/config/AmazonDynamoDBConfig.java b/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/config/AmazonDynamoDBConfig.java index ccb6808ffa0f..5194e50f7c6b 100644 --- a/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/config/AmazonDynamoDBConfig.java +++ b/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/config/AmazonDynamoDBConfig.java @@ -49,14 +49,12 @@ public class AmazonDynamoDBConfig implements Serializable { .noDefaultValue() .withDescription("The table of Amazon DynamoDB"); - @SuppressWarnings("checkstyle:MagicNumber") public static final Option BATCH_SIZE = Options.key("batch_size") .intType() .defaultValue(25) .withDescription("The batch size of Amazon DynamoDB"); - @SuppressWarnings("checkstyle:MagicNumber") public static final Option BATCH_INTERVAL_MS = Options.key("batch_interval_ms") .intType() diff --git a/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/config/AmazonDynamoDBSourceOptions.java b/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/config/AmazonDynamoDBSourceOptions.java index f92921ee140f..54f955f540e0 100644 --- a/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/config/AmazonDynamoDBSourceOptions.java +++ b/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/config/AmazonDynamoDBSourceOptions.java @@ -43,7 +43,6 @@ public class AmazonDynamoDBSourceOptions implements Serializable { private Config schema; public int batchSize = AmazonDynamoDBConfig.BATCH_SIZE.defaultValue(); - public int batchIntervalMs = AmazonDynamoDBConfig.BATCH_INTERVAL_MS.defaultValue(); public AmazonDynamoDBSourceOptions(Config config) { this.url = config.getString(AmazonDynamoDBConfig.URL.key()); @@ -57,8 +56,5 @@ public AmazonDynamoDBSourceOptions(Config config) { if (config.hasPath(AmazonDynamoDBConfig.BATCH_SIZE.key())) { this.batchSize = config.getInt(AmazonDynamoDBConfig.BATCH_SIZE.key()); } - if (config.hasPath(AmazonDynamoDBConfig.BATCH_INTERVAL_MS.key())) { - this.batchIntervalMs = config.getInt(AmazonDynamoDBConfig.BATCH_INTERVAL_MS.key()); - } } } diff --git a/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/sink/AmazonDynamoDBWriter.java b/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/sink/AmazonDynamoDBWriter.java index 016036cc841b..d059bce7b578 100644 --- a/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/sink/AmazonDynamoDBWriter.java +++ b/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/sink/AmazonDynamoDBWriter.java @@ -25,6 +25,7 @@ import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSinkWriter; import java.io.IOException; +import java.util.Optional; public class AmazonDynamoDBWriter extends AbstractSinkWriter { @@ -48,4 +49,10 @@ public void write(SeaTunnelRow element) throws IOException { public void close() throws IOException { dynamoDbSinkClient.close(); } + + @Override + public Optional prepareCommit() { + dynamoDbSinkClient.flush(); + return Optional.empty(); + } } diff --git a/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/sink/DynamoDbSinkClient.java b/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/sink/DynamoDbSinkClient.java index d8acf33ebeb8..e42f573dfb8a 100644 --- a/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/sink/DynamoDbSinkClient.java +++ b/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/sink/DynamoDbSinkClient.java @@ -24,7 +24,6 @@ import org.apache.seatunnel.connectors.seatunnel.amazondynamodb.serialize.DefaultSeaTunnelRowDeserializer; import org.apache.seatunnel.connectors.seatunnel.amazondynamodb.serialize.SeaTunnelRowDeserializer; -import com.google.common.util.concurrent.ThreadFactoryBuilder; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; import software.amazon.awssdk.regions.Region; @@ -40,15 +39,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.TimeUnit; public class DynamoDbSinkClient { private final AmazonDynamoDBSourceOptions amazondynamodbSourceOptions; - private ScheduledExecutorService scheduler; - private ScheduledFuture scheduledFuture; private volatile boolean initialize; private volatile Exception flushException; private DynamoDbClient dynamoDbClient; @@ -62,7 +55,7 @@ public DynamoDbSinkClient( this.seaTunnelRowDeserializer = new DefaultSeaTunnelRowDeserializer(typeInfo); } - private void tryInit() throws IOException { + private void tryInit() { if (initialize) { return; } @@ -78,25 +71,6 @@ private void tryInit() throws IOException { amazondynamodbSourceOptions.getAccessKeyId(), amazondynamodbSourceOptions.getSecretAccessKey()))) .build(); - - scheduler = - Executors.newSingleThreadScheduledExecutor( - new ThreadFactoryBuilder() - .setNameFormat("DdynamoDb-sink-output-%s") - .build()); - scheduledFuture = - scheduler.scheduleAtFixedRate( - () -> { - try { - flush(); - } catch (IOException e) { - flushException = e; - } - }, - amazondynamodbSourceOptions.getBatchIntervalMs(), - amazondynamodbSourceOptions.getBatchIntervalMs(), - TimeUnit.MILLISECONDS); - initialize = true; } @@ -114,17 +88,13 @@ public synchronized void write(PutItemRequest putItemRequest) throws IOException } public synchronized void close() throws IOException { - if (scheduledFuture != null) { - scheduledFuture.cancel(false); - scheduler.shutdown(); - } if (dynamoDbClient != null) { flush(); dynamoDbClient.close(); } } - synchronized void flush() throws IOException { + synchronized void flush() { checkFlushException(); if (batchList.isEmpty()) { return; diff --git a/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/source/AmazonDynamoDBSourceReader.java b/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/source/AmazonDynamoDBSourceReader.java index afaafa3f8a9f..c25f8b0e0b15 100644 --- a/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/source/AmazonDynamoDBSourceReader.java +++ b/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/source/AmazonDynamoDBSourceReader.java @@ -31,11 +31,13 @@ import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.dynamodb.DynamoDbClient; +import software.amazon.awssdk.services.dynamodb.model.AttributeValue; import software.amazon.awssdk.services.dynamodb.model.ScanRequest; import software.amazon.awssdk.services.dynamodb.model.ScanResponse; import java.io.IOException; import java.net.URI; +import java.util.Map; @Slf4j public class AmazonDynamoDBSourceReader extends AbstractSingleSplitReader { @@ -78,18 +80,25 @@ public void close() throws IOException { @Override @SuppressWarnings("magicnumber") public void pollNext(Collector output) throws Exception { - ScanResponse scan = - dynamoDbClient.scan( - ScanRequest.builder() - .tableName(amazondynamodbSourceOptions.getTable()) - .build()); - if (scan.hasItems()) { - scan.items() - .forEach( - item -> { - output.collect(seaTunnelRowDeserializer.deserialize(item)); - }); - } + Map lastKeyEvaluated = null; + + ScanResponse scan; + do { + scan = + dynamoDbClient.scan( + ScanRequest.builder() + .tableName(amazondynamodbSourceOptions.getTable()) + .exclusiveStartKey(lastKeyEvaluated) + .build()); + if (scan.hasItems()) { + scan.items() + .forEach( + item -> { + output.collect(seaTunnelRowDeserializer.deserialize(item)); + }); + } + lastKeyEvaluated = scan.lastEvaluatedKey(); + } while (lastKeyEvaluated != null && !lastKeyEvaluated.isEmpty()); context.signalNoMoreElement(); } } diff --git a/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/excecutor/AssertExecutor.java b/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/excecutor/AssertExecutor.java index c8666cd9a55e..5868fba91276 100644 --- a/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/excecutor/AssertExecutor.java +++ b/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/excecutor/AssertExecutor.java @@ -20,6 +20,8 @@ import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.assertion.exception.AssertConnectorErrorCode; +import org.apache.seatunnel.connectors.seatunnel.assertion.exception.AssertConnectorException; import org.apache.seatunnel.connectors.seatunnel.assertion.rule.AssertFieldRule; import org.apache.commons.lang3.StringUtils; @@ -27,6 +29,7 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; +import java.math.BigDecimal; import java.time.LocalDate; import java.time.LocalDateTime; import java.time.LocalTime; @@ -108,33 +111,8 @@ private boolean pass(Object value, AssertFieldRule.AssertRule valueRule) { return ((Number) value).doubleValue() >= valueRule.getRuleValue(); } if (valueRule.getEqualTo() != null) { - if (value instanceof String) { - return value.equals(valueRule.getEqualTo()); - } - if (value instanceof Number) { - return ((Number) value).doubleValue() == Double.parseDouble(valueRule.getEqualTo()); - } - if (value instanceof Boolean) { - return value.equals(Boolean.parseBoolean(valueRule.getEqualTo())); - } - if (value instanceof LocalDateTime) { - TemporalAccessor parsedTimestamp = - DateTimeFormatter.ISO_LOCAL_DATE_TIME.parse(valueRule.getEqualTo()); - LocalTime localTime = parsedTimestamp.query(TemporalQueries.localTime()); - LocalDate localDate = parsedTimestamp.query(TemporalQueries.localDate()); - return ((LocalDateTime) value).isEqual(LocalDateTime.of(localDate, localTime)); - } - if (value instanceof LocalDate) { - DateTimeFormatter fmt = DateTimeFormatter.ofPattern("yyyy-MM-dd"); - return ((LocalDate) value).isEqual(LocalDate.parse(valueRule.getEqualTo(), fmt)); - } - if (value instanceof LocalTime) { - DateTimeFormatter fmt = DateTimeFormatter.ofPattern("HH:mm:ss"); - return value.equals(LocalTime.parse(valueRule.getEqualTo(), fmt)); - } - return false; + return compareValue(value, valueRule); } - String valueStr = Objects.isNull(value) ? StringUtils.EMPTY : String.valueOf(value); if (AssertFieldRule.AssertRuleType.MAX_LENGTH.equals(valueRule.getRuleType())) { return valueStr.length() <= valueRule.getRuleValue(); @@ -146,6 +124,44 @@ private boolean pass(Object value, AssertFieldRule.AssertRule valueRule) { return Boolean.TRUE; } + private boolean compareValue(Object value, AssertFieldRule.AssertRule valueRule) { + if (value instanceof String) { + return value.equals(valueRule.getEqualTo()); + } else if (value instanceof Integer) { + return value.equals(Integer.parseInt(valueRule.getEqualTo())); + } else if (value instanceof Long) { + return value.equals(Long.parseLong(valueRule.getEqualTo())); + } else if (value instanceof Short) { + return value.equals(Short.parseShort(valueRule.getEqualTo())); + } else if (value instanceof Float) { + return value.equals((Float.parseFloat(valueRule.getEqualTo()))); + } else if (value instanceof Byte) { + return value.equals((Byte.parseByte(valueRule.getEqualTo()))); + } else if (value instanceof Double) { + return value.equals(Double.parseDouble(valueRule.getEqualTo())); + } else if (value instanceof BigDecimal) { + return value.equals(new BigDecimal(valueRule.getEqualTo())); + } else if (value instanceof Boolean) { + return value.equals(Boolean.parseBoolean(valueRule.getEqualTo())); + } else if (value instanceof LocalDateTime) { + TemporalAccessor parsedTimestamp = + DateTimeFormatter.ISO_LOCAL_DATE_TIME.parse(valueRule.getEqualTo()); + LocalTime localTime = parsedTimestamp.query(TemporalQueries.localTime()); + LocalDate localDate = parsedTimestamp.query(TemporalQueries.localDate()); + return ((LocalDateTime) value).isEqual(LocalDateTime.of(localDate, localTime)); + } else if (value instanceof LocalDate) { + DateTimeFormatter fmt = DateTimeFormatter.ofPattern("yyyy-MM-dd"); + return ((LocalDate) value).isEqual(LocalDate.parse(valueRule.getEqualTo(), fmt)); + } else if (value instanceof LocalTime) { + DateTimeFormatter fmt = DateTimeFormatter.ofPattern("HH:mm:ss"); + return value.equals(LocalTime.parse(valueRule.getEqualTo(), fmt)); + } else { + throw new AssertConnectorException( + AssertConnectorErrorCode.TYPES_NOT_SUPPORTED_FAILED, + String.format(" %s types not supported yet", value.getClass().getSimpleName())); + } + } + private Boolean checkType(Object value, SeaTunnelDataType fieldType) { return value.getClass().equals(fieldType.getTypeClass()); } diff --git a/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/exception/AssertConnectorErrorCode.java b/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/exception/AssertConnectorErrorCode.java index abb085e2837e..16ae8aed1c7d 100644 --- a/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/exception/AssertConnectorErrorCode.java +++ b/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/exception/AssertConnectorErrorCode.java @@ -20,7 +20,8 @@ import org.apache.seatunnel.common.exception.SeaTunnelErrorCode; public enum AssertConnectorErrorCode implements SeaTunnelErrorCode { - RULE_VALIDATION_FAILED("ASSERT-01", "Rule validate failed"); + RULE_VALIDATION_FAILED("ASSERT-01", "Rule validate failed"), + TYPES_NOT_SUPPORTED_FAILED("ASSERT-02", "Types not supported"); private final String code; private final String description; diff --git a/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/rule/AssertRuleParser.java b/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/rule/AssertRuleParser.java index f479dfa5c99b..eccf2c684505 100644 --- a/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/rule/AssertRuleParser.java +++ b/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/rule/AssertRuleParser.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.shade.com.typesafe.config.Config; import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.DecimalType; import org.apache.seatunnel.api.table.type.LocalTimeType; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; @@ -105,5 +106,6 @@ private SeaTunnelDataType getFieldType(String fieldTypeStr) { TYPES.put("datetime", LocalTimeType.LOCAL_DATE_TIME_TYPE); TYPES.put("date", LocalTimeType.LOCAL_DATE_TYPE); TYPES.put("time", LocalTimeType.LOCAL_TIME_TYPE); + TYPES.put("decimal", new DecimalType(38, 18)); } } diff --git a/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/sink/AssertSinkWriter.java b/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/sink/AssertSinkWriter.java index d397681189cf..ee865ad9da66 100644 --- a/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/sink/AssertSinkWriter.java +++ b/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/sink/AssertSinkWriter.java @@ -47,7 +47,6 @@ public AssertSinkWriter( } @Override - @SuppressWarnings("checkstyle:RegexpSingleline") public void write(SeaTunnelRow element) { LONG_ACCUMULATOR.accumulate(1); if (Objects.nonNull(assertFieldRules)) { diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/config/JdbcSourceConfigFactory.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/config/JdbcSourceConfigFactory.java index 068ee4be116d..d5d920c2573f 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/config/JdbcSourceConfigFactory.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/config/JdbcSourceConfigFactory.java @@ -29,7 +29,6 @@ import java.util.Properties; /** A {@link SourceConfig.Factory} to provide {@link SourceConfig} of JDBC data source. */ -@SuppressWarnings("checkstyle:MagicNumber") public abstract class JdbcSourceConfigFactory implements SourceConfig.Factory { private static final long serialVersionUID = 1L; diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/option/JdbcSourceOptions.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/option/JdbcSourceOptions.java index 715915c745c9..813d69b862a1 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/option/JdbcSourceOptions.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/option/JdbcSourceOptions.java @@ -25,7 +25,6 @@ import java.util.List; /** Configurations for {@link IncrementalSource} of JDBC data source. */ -@SuppressWarnings("checkstyle:MagicNumber") public class JdbcSourceOptions extends SourceOptions { public static final Option HOSTNAME = diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/relational/connection/JdbcConnectionFactory.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/relational/connection/JdbcConnectionFactory.java index 5fad3f6f22c8..349adf37fdc1 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/relational/connection/JdbcConnectionFactory.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/relational/connection/JdbcConnectionFactory.java @@ -44,7 +44,6 @@ public JdbcConnectionFactory( this.jdbcConnectionPoolFactory = jdbcConnectionPoolFactory; } - @SuppressWarnings("checkstyle:MagicNumber") @Override public Connection connect(JdbcConfiguration config) throws SQLException { final int connectRetryTimes = sourceConfig.getConnectMaxRetries(); diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/schema/SchemaChangeResolver.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/schema/SchemaChangeResolver.java new file mode 100644 index 000000000000..ee3ef08f7d22 --- /dev/null +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/schema/SchemaChangeResolver.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.cdc.base.schema; + +import org.apache.seatunnel.api.table.event.SchemaChangeEvent; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; + +import org.apache.kafka.connect.source.SourceRecord; + +import java.io.Serializable; + +public interface SchemaChangeResolver extends Serializable { + + boolean support(SourceRecord record); + + SchemaChangeEvent resolve(SourceRecord record, SeaTunnelDataType dataType); +} diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/IncrementalSource.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/IncrementalSource.java index 965d3fd27fe8..ed04fb0f5d7d 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/IncrementalSource.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/IncrementalSource.java @@ -27,6 +27,7 @@ import org.apache.seatunnel.api.source.SeaTunnelSource; import org.apache.seatunnel.api.source.SourceReader; import org.apache.seatunnel.api.source.SourceSplitEnumerator; +import org.apache.seatunnel.api.source.SupportCoordinate; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.connectors.cdc.base.config.SourceConfig; @@ -36,6 +37,7 @@ import org.apache.seatunnel.connectors.cdc.base.option.SourceOptions; import org.apache.seatunnel.connectors.cdc.base.option.StartupMode; import org.apache.seatunnel.connectors.cdc.base.option.StopMode; +import org.apache.seatunnel.connectors.cdc.base.schema.SchemaChangeResolver; import org.apache.seatunnel.connectors.cdc.base.source.enumerator.HybridSplitAssigner; import org.apache.seatunnel.connectors.cdc.base.source.enumerator.IncrementalSourceEnumerator; import org.apache.seatunnel.connectors.cdc.base.source.enumerator.IncrementalSplitAssigner; @@ -75,7 +77,7 @@ @NoArgsConstructor public abstract class IncrementalSource - implements SeaTunnelSource { + implements SeaTunnelSource, SupportCoordinate { protected ReadonlyConfig readonlyConfig; protected SourceConfig.Factory configFactory; @@ -167,17 +169,22 @@ public SourceReader createReader(SourceReader.Context reader BlockingQueue> elementsQueue = new LinkedBlockingQueue<>(2); + SchemaChangeResolver schemaChangeResolver = deserializationSchema.getSchemaChangeResolver(); Supplier> splitReaderSupplier = () -> new IncrementalSourceSplitReader<>( - readerContext.getIndexOfSubtask(), dataSourceDialect, sourceConfig); + readerContext.getIndexOfSubtask(), + dataSourceDialect, + sourceConfig, + schemaChangeResolver); return new IncrementalSourceReader<>( elementsQueue, splitReaderSupplier, createRecordEmitter(sourceConfig, readerContext.getMetricsContext()), new SourceReaderOptions(readonlyConfig), readerContext, - sourceConfig); + sourceConfig, + deserializationSchema); } protected RecordEmitter createRecordEmitter( diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/enumerator/IncrementalSplitAssigner.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/enumerator/IncrementalSplitAssigner.java index d000d505363b..fe8204f6cd2f 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/enumerator/IncrementalSplitAssigner.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/enumerator/IncrementalSplitAssigner.java @@ -17,6 +17,7 @@ package org.apache.seatunnel.connectors.cdc.base.source.enumerator; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.connectors.cdc.base.config.SourceConfig; import org.apache.seatunnel.connectors.cdc.base.source.enumerator.state.IncrementalPhaseState; import org.apache.seatunnel.connectors.cdc.base.source.event.SnapshotSplitWatermark; @@ -70,6 +71,7 @@ public class IncrementalSplitAssigner implements SplitAs private final Map assignedSplits = new HashMap<>(); private boolean startWithSnapshotMinimumOffset = true; + private SeaTunnelDataType checkpointDataType; public IncrementalSplitAssigner( SplitAssigner.Context context, @@ -152,6 +154,7 @@ public void addSplits(Collection splits) { } tableWatermarks.put(tableId, startupOffset); } + checkpointDataType = incrementalSplit.getCheckpointDataType(); }); if (!tableWatermarks.isEmpty()) { this.startWithSnapshotMinimumOffset = false; @@ -249,6 +252,7 @@ private IncrementalSplit createIncrementalSplit( capturedTables, incrementalSplitStartOffset, sourceConfig.getStopConfig().getStopOffset(offsetFactory), - completedSnapshotSplitInfos); + completedSnapshotSplitInfos, + checkpointDataType); } } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/enumerator/splitter/AbstractJdbcSourceChunkSplitter.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/enumerator/splitter/AbstractJdbcSourceChunkSplitter.java index e956b111709b..e99e7dab4b19 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/enumerator/splitter/AbstractJdbcSourceChunkSplitter.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/enumerator/splitter/AbstractJdbcSourceChunkSplitter.java @@ -112,6 +112,19 @@ private List splitTableIntoChunks( final int chunkSize = sourceConfig.getSplitSize(); final double distributionFactorUpper = sourceConfig.getDistributionFactorUpper(); final double distributionFactorLower = sourceConfig.getDistributionFactorLower(); + final int sampleShardingThreshold = sourceConfig.getSampleShardingThreshold(); + + log.info( + "Splitting table {} into chunks, split column: {}, min: {}, max: {}, chunk size: {}, " + + "distribution factor upper: {}, distribution factor lower: {}, sample sharding threshold: {}", + tableId, + splitColumnName, + min, + max, + chunkSize, + distributionFactorUpper, + distributionFactorLower, + sampleShardingThreshold); if (isEvenlySplitColumn(splitColumn)) { long approximateRowCnt = queryApproximateRowCnt(jdbc, tableId); @@ -130,7 +143,7 @@ private List splitTableIntoChunks( } else { int shardCount = (int) (approximateRowCnt / chunkSize); int inverseSamplingRate = sourceConfig.getInverseSamplingRate(); - if (sourceConfig.getSampleShardingThreshold() < shardCount) { + if (sampleShardingThreshold < shardCount) { // It is necessary to ensure that the number of data rows sampled by the // sampling rate is greater than the number of shards. // Otherwise, if the sampling rate is too low, it may result in an insufficient @@ -144,9 +157,17 @@ private List splitTableIntoChunks( chunkSize); inverseSamplingRate = chunkSize; } + log.info( + "Use sampling sharding for table {}, the sampling rate is {}", + tableId, + inverseSamplingRate); Object[] sample = sampleDataFromColumn( jdbc, tableId, splitColumnName, inverseSamplingRate); + log.info( + "Sample data from table {} end, the sample size is {}", + tableId, + sample.length); return efficientShardingThroughSampling( tableId, sample, approximateRowCnt, shardCount); } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/enumerator/splitter/ChunkRange.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/enumerator/splitter/ChunkRange.java index 1cf62f3448bf..c543bad18cdf 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/enumerator/splitter/ChunkRange.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/enumerator/splitter/ChunkRange.java @@ -22,7 +22,7 @@ import java.util.Objects; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; /** * An internal structure describes a chunk range with a chunk start (inclusive) and chunk end diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/IncrementalSourceReader.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/IncrementalSourceReader.java index b251759ff7c2..ceb6215f41d4 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/IncrementalSourceReader.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/IncrementalSourceReader.java @@ -19,6 +19,7 @@ import org.apache.seatunnel.api.source.Collector; import org.apache.seatunnel.api.source.SourceReader; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.connectors.cdc.base.config.SourceConfig; import org.apache.seatunnel.connectors.cdc.base.source.event.CompletedSnapshotSplitsReportEvent; import org.apache.seatunnel.connectors.cdc.base.source.event.SnapshotSplitWatermark; @@ -29,6 +30,7 @@ import org.apache.seatunnel.connectors.cdc.base.source.split.state.IncrementalSplitState; import org.apache.seatunnel.connectors.cdc.base.source.split.state.SnapshotSplitState; import org.apache.seatunnel.connectors.cdc.base.source.split.state.SourceSplitStateBase; +import org.apache.seatunnel.connectors.cdc.debezium.DebeziumDeserializationSchema; import org.apache.seatunnel.connectors.seatunnel.common.source.reader.RecordEmitter; import org.apache.seatunnel.connectors.seatunnel.common.source.reader.RecordsWithSplitIds; import org.apache.seatunnel.connectors.seatunnel.common.source.reader.SingleThreadMultiplexSourceReaderBase; @@ -38,6 +40,7 @@ import lombok.extern.slf4j.Slf4j; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -62,6 +65,7 @@ public class IncrementalSourceReader private final int subtaskId; private final C sourceConfig; + private final DebeziumDeserializationSchema debeziumDeserializationSchema; public IncrementalSourceReader( BlockingQueue> elementsQueue, @@ -69,7 +73,8 @@ public IncrementalSourceReader( RecordEmitter recordEmitter, SourceReaderOptions options, SourceReader.Context context, - C sourceConfig) { + C sourceConfig, + DebeziumDeserializationSchema debeziumDeserializationSchema) { super( elementsQueue, new SingleThreadFetcherManager<>(elementsQueue, splitReaderSupplier::get), @@ -79,6 +84,7 @@ public IncrementalSourceReader( this.sourceConfig = sourceConfig; this.finishedUnackedSplits = new HashMap<>(); this.subtaskId = context.getIndexOfSubtask(); + this.debeziumDeserializationSchema = debeziumDeserializationSchema; } @Override @@ -163,6 +169,15 @@ protected SourceSplitStateBase initializedState(SourceSplitBase split) { if (split.isSnapshotSplit()) { return new SnapshotSplitState(split.asSnapshotSplit()); } else { + IncrementalSplit incrementalSplit = split.asIncrementalSplit(); + if (incrementalSplit.getCheckpointDataType() != null) { + log.info( + "The incremental split[{}] has checkpoint datatype {} for restore.", + incrementalSplit.splitId(), + incrementalSplit.getCheckpointDataType()); + debeziumDeserializationSchema.restoreCheckpointProducedType( + incrementalSplit.getCheckpointDataType()); + } return new IncrementalSplitState(split.asIncrementalSplit()); } } @@ -180,6 +195,10 @@ public List snapshotState(long checkpointId) { // add finished snapshot splits that didn't receive ack yet unfinishedSplits.addAll(finishedUnackedSplits.values()); + if (isIncrementalSplitPhase(unfinishedSplits)) { + return snapshotCheckpointDataType(unfinishedSplits); + } + return unfinishedSplits; } @@ -187,4 +206,25 @@ public List snapshotState(long checkpointId) { protected SourceSplitBase toSplitType(String splitId, SourceSplitStateBase splitState) { return splitState.toSourceSplit(); } + + private boolean isIncrementalSplitPhase(List stateSplits) { + return stateSplits.size() == 1 && stateSplits.get(0).isIncrementalSplit(); + } + + private List snapshotCheckpointDataType(List stateSplits) { + if (!isIncrementalSplitPhase(stateSplits)) { + throw new IllegalStateException( + "The splits should be incremental split when snapshot checkpoint datatype"); + } + IncrementalSplit incrementalSplit = stateSplits.get(0).asIncrementalSplit(); + // Snapshot current datatype to checkpoint + SeaTunnelDataType checkpointDataType = debeziumDeserializationSchema.getProducedType(); + IncrementalSplit newIncrementalSplit = + new IncrementalSplit(incrementalSplit, checkpointDataType); + log.debug( + "Snapshot checkpoint datatype {} into split[{}] state.", + checkpointDataType, + incrementalSplit.splitId()); + return Arrays.asList(newIncrementalSplit); + } } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/IncrementalSourceRecordEmitter.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/IncrementalSourceRecordEmitter.java index 2f8409b99a3a..eacb427acbcb 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/IncrementalSourceRecordEmitter.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/IncrementalSourceRecordEmitter.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.api.common.metrics.Counter; import org.apache.seatunnel.api.common.metrics.MetricsContext; import org.apache.seatunnel.api.source.Collector; +import org.apache.seatunnel.api.table.event.SchemaChangeEvent; import org.apache.seatunnel.connectors.cdc.base.source.offset.Offset; import org.apache.seatunnel.connectors.cdc.base.source.offset.OffsetFactory; import org.apache.seatunnel.connectors.cdc.base.source.split.SourceRecords; @@ -37,6 +38,8 @@ import static org.apache.seatunnel.connectors.cdc.base.source.split.wartermark.WatermarkEvent.isHighWatermarkEvent; import static org.apache.seatunnel.connectors.cdc.base.source.split.wartermark.WatermarkEvent.isLowWatermarkEvent; +import static org.apache.seatunnel.connectors.cdc.base.source.split.wartermark.WatermarkEvent.isSchemaChangeAfterWatermarkEvent; +import static org.apache.seatunnel.connectors.cdc.base.source.split.wartermark.WatermarkEvent.isSchemaChangeBeforeWatermarkEvent; import static org.apache.seatunnel.connectors.cdc.base.source.split.wartermark.WatermarkEvent.isWatermarkEvent; import static org.apache.seatunnel.connectors.cdc.base.utils.SourceRecordUtils.getFetchTimestamp; import static org.apache.seatunnel.connectors.cdc.base.utils.SourceRecordUtils.getMessageTimestamp; @@ -110,9 +113,12 @@ protected void processElement( Offset watermark = getWatermark(element); if (isLowWatermarkEvent(element) && splitState.isSnapshotSplitState()) { splitState.asSnapshotSplitState().setLowWatermark(watermark); - } - if (isHighWatermarkEvent(element) && splitState.isSnapshotSplitState()) { + } else if (isHighWatermarkEvent(element) && splitState.isSnapshotSplitState()) { splitState.asSnapshotSplitState().setHighWatermark(watermark); + } else if ((isSchemaChangeBeforeWatermarkEvent(element) + || isSchemaChangeAfterWatermarkEvent(element)) + && splitState.isIncrementalSplitState()) { + emitElement(element, output); } } else if (isSchemaChangeEvent(element) && splitState.isIncrementalSplitState()) { emitElement(element, output); @@ -157,9 +163,24 @@ public void collect(T record) { output.collect(record); } + @Override + public void collect(SchemaChangeEvent event) { + output.collect(event); + } + + @Override + public void markSchemaChangeBeforeCheckpoint() { + output.markSchemaChangeBeforeCheckpoint(); + } + + @Override + public void markSchemaChangeAfterCheckpoint() { + output.markSchemaChangeAfterCheckpoint(); + } + @Override public Object getCheckpointLock() { - return null; + return output.getCheckpointLock(); } } } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/IncrementalSourceSplitReader.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/IncrementalSourceSplitReader.java index 932b5f0e4e96..53f97362734f 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/IncrementalSourceSplitReader.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/IncrementalSourceSplitReader.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.common.utils.SeaTunnelException; import org.apache.seatunnel.connectors.cdc.base.config.SourceConfig; import org.apache.seatunnel.connectors.cdc.base.dialect.DataSourceDialect; +import org.apache.seatunnel.connectors.cdc.base.schema.SchemaChangeResolver; import org.apache.seatunnel.connectors.cdc.base.source.reader.external.FetchTask; import org.apache.seatunnel.connectors.cdc.base.source.reader.external.Fetcher; import org.apache.seatunnel.connectors.cdc.base.source.reader.external.IncrementalSourceScanFetcher; @@ -50,13 +51,18 @@ public class IncrementalSourceSplitReader private String currentSplitId; private final DataSourceDialect dataSourceDialect; private final C sourceConfig; + private final SchemaChangeResolver schemaChangeResolver; public IncrementalSourceSplitReader( - int subtaskId, DataSourceDialect dataSourceDialect, C sourceConfig) { + int subtaskId, + DataSourceDialect dataSourceDialect, + C sourceConfig, + SchemaChangeResolver schemaChangeResolver) { this.subtaskId = subtaskId; this.splits = new ArrayDeque<>(); this.dataSourceDialect = dataSourceDialect; this.sourceConfig = sourceConfig; + this.schemaChangeResolver = schemaChangeResolver; } @Override @@ -133,7 +139,9 @@ protected void checkSplitOrStartNext() throws IOException { } final FetchTask.Context taskContext = dataSourceDialect.createFetchTaskContext(nextSplit, sourceConfig); - currentFetcher = new IncrementalSourceStreamFetcher(taskContext, subtaskId); + currentFetcher = + new IncrementalSourceStreamFetcher( + taskContext, subtaskId, schemaChangeResolver); log.info("Stream fetcher is created."); } currentFetcher.submitTask(dataSourceDialect.createFetchTask(nextSplit)); diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/external/IncrementalSourceStreamFetcher.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/external/IncrementalSourceStreamFetcher.java index 2b8e9f7725fd..31fdaaf2e50a 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/external/IncrementalSourceStreamFetcher.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/external/IncrementalSourceStreamFetcher.java @@ -18,11 +18,14 @@ package org.apache.seatunnel.connectors.cdc.base.source.reader.external; import org.apache.seatunnel.common.utils.SeaTunnelException; +import org.apache.seatunnel.connectors.cdc.base.schema.SchemaChangeResolver; import org.apache.seatunnel.connectors.cdc.base.source.offset.Offset; import org.apache.seatunnel.connectors.cdc.base.source.split.CompletedSnapshotSplitInfo; import org.apache.seatunnel.connectors.cdc.base.source.split.IncrementalSplit; import org.apache.seatunnel.connectors.cdc.base.source.split.SourceRecords; import org.apache.seatunnel.connectors.cdc.base.source.split.SourceSplitBase; +import org.apache.seatunnel.connectors.cdc.base.source.split.wartermark.WatermarkEvent; +import org.apache.seatunnel.connectors.cdc.base.utils.SourceRecordUtils; import org.apache.kafka.connect.source.SourceRecord; @@ -33,6 +36,7 @@ import lombok.extern.slf4j.Slf4j; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -53,6 +57,7 @@ @Slf4j public class IncrementalSourceStreamFetcher implements Fetcher { private final FetchTask.Context taskContext; + private final SchemaChangeResolver schemaChangeResolver; private final ExecutorService executorService; // has entered pure binlog mode private final Set pureBinlogPhaseTables; @@ -72,8 +77,12 @@ public class IncrementalSourceStreamFetcher implements Fetcher pollSplitRecords() throws InterruptedException, SeaTunnelException { checkReadException(); - final List sourceRecords = new ArrayList<>(); + + Iterator sourceRecordsIterator = Collections.emptyIterator(); if (streamFetchTask.isRunning()) { List batch = queue.poll(); - for (DataChangeEvent event : batch) { + if (!batch.isEmpty()) { + if (schemaChangeResolver != null) { + sourceRecordsIterator = splitSchemaChangeStream(batch); + } else { + sourceRecordsIterator = splitNormalStream(batch); + } + } + } + return sourceRecordsIterator; + } + + private Iterator splitNormalStream(List batchEvents) { + List sourceRecords = new ArrayList<>(); + if (streamFetchTask.isRunning()) { + for (DataChangeEvent event : batchEvents) { if (shouldEmit(event.getRecord())) { sourceRecords.add(event.getRecord()); } @@ -125,6 +149,92 @@ public Iterator pollSplitRecords() return sourceRecordsSet.iterator(); } + /** + * Split schema change stream. + * + *

    For example 1: + * + *

    Before event batch: [a, b, c, SchemaChangeEvent-1, SchemaChangeEvent-2, d, e] + * + *

    After event batch: [a, b, c, checkpoint-before] [SchemaChangeEvent-1, SchemaChangeEvent-2, + * checkpoint-after] [d, e] + * + *

    For example 2: + * + *

    Before event batch: [SchemaChangeEvent-1, SchemaChangeEvent-2, a, b, c, d, e] + * + *

    After event batch: [checkpoint-before] [SchemaChangeEvent-1, SchemaChangeEvent-2, + * checkpoint-after] [a, b, c, d, e] + */ + private Iterator splitSchemaChangeStream(List batchEvents) { + List sourceRecordsSet = new ArrayList<>(); + + List sourceRecordList = new ArrayList<>(); + SourceRecord previousRecord = null; + for (int i = 0; i < batchEvents.size(); i++) { + DataChangeEvent event = batchEvents.get(i); + SourceRecord currentRecord = event.getRecord(); + if (!shouldEmit(currentRecord)) { + continue; + } + if (!SourceRecordUtils.isDataChangeRecord(currentRecord) + && !SourceRecordUtils.isSchemaChangeEvent(currentRecord)) { + sourceRecordList.add(currentRecord); + continue; + } + + if (SourceRecordUtils.isSchemaChangeEvent(currentRecord)) { + if (!schemaChangeResolver.support(currentRecord)) { + continue; + } + + if (previousRecord == null) { + // add schema-change-before to first + sourceRecordList.add( + WatermarkEvent.createSchemaChangeBeforeWatermark(currentRecord)); + sourceRecordsSet.add(new SourceRecords(sourceRecordList)); + sourceRecordList = new ArrayList<>(); + sourceRecordList.add(currentRecord); + } else if (SourceRecordUtils.isSchemaChangeEvent(previousRecord)) { + sourceRecordList.add(currentRecord); + } else { + sourceRecordList.add( + WatermarkEvent.createSchemaChangeBeforeWatermark(currentRecord)); + sourceRecordsSet.add(new SourceRecords(sourceRecordList)); + sourceRecordList = new ArrayList<>(); + sourceRecordList.add(currentRecord); + } + } else if (SourceRecordUtils.isDataChangeRecord(currentRecord)) { + if (previousRecord == null + || SourceRecordUtils.isDataChangeRecord(previousRecord)) { + sourceRecordList.add(currentRecord); + } else { + sourceRecordList.add( + WatermarkEvent.createSchemaChangeAfterWatermark(currentRecord)); + sourceRecordsSet.add(new SourceRecords(sourceRecordList)); + sourceRecordList = new ArrayList<>(); + sourceRecordList.add(currentRecord); + } + } + previousRecord = currentRecord; + if (i == batchEvents.size() - 1) { + if (SourceRecordUtils.isSchemaChangeEvent(currentRecord)) { + sourceRecordList.add( + WatermarkEvent.createSchemaChangeAfterWatermark(currentRecord)); + } + sourceRecordsSet.add(new SourceRecords(sourceRecordList)); + } + } + + if (sourceRecordsSet.size() > 1) { + log.debug( + "Split events stream into {} batches and mark schema checkpoint before/after", + sourceRecordsSet.size()); + } + + return sourceRecordsSet.iterator(); + } + private void checkReadException() { if (readException != null) { throw new SeaTunnelException( diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/external/JdbcSourceFetchTaskContext.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/external/JdbcSourceFetchTaskContext.java index 70741aa6192f..025b3aafee0f 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/external/JdbcSourceFetchTaskContext.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/reader/external/JdbcSourceFetchTaskContext.java @@ -76,7 +76,6 @@ public boolean isRecordBetween(SourceRecord record, Object[] splitStart, Object[ return SourceRecordUtils.splitKeyRangeContains(key, splitStart, splitEnd); } - @SuppressWarnings("checkstyle:MissingSwitchDefault") @Override public void rewriteOutputBuffer( Map outputBuffer, SourceRecord changeRecord) { diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/split/IncrementalSplit.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/split/IncrementalSplit.java index a5e6a9cbacdb..640e173682ac 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/split/IncrementalSplit.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/split/IncrementalSplit.java @@ -17,6 +17,7 @@ package org.apache.seatunnel.connectors.cdc.base.source.split; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.connectors.cdc.base.source.offset.Offset; import io.debezium.relational.TableId; @@ -43,16 +44,39 @@ public class IncrementalSplit extends SourceSplitBase { */ private final List completedSnapshotSplitInfos; + private final SeaTunnelDataType checkpointDataType; + public IncrementalSplit( String splitId, List capturedTables, Offset startupOffset, Offset stopOffset, List completedSnapshotSplitInfos) { + this(splitId, capturedTables, startupOffset, stopOffset, completedSnapshotSplitInfos, null); + } + + public IncrementalSplit(IncrementalSplit split, SeaTunnelDataType checkpointDataType) { + this( + split.splitId(), + split.getTableIds(), + split.getStartupOffset(), + split.getStopOffset(), + split.getCompletedSnapshotSplitInfos(), + checkpointDataType); + } + + public IncrementalSplit( + String splitId, + List capturedTables, + Offset startupOffset, + Offset stopOffset, + List completedSnapshotSplitInfos, + SeaTunnelDataType checkpointDataType) { super(splitId); this.tableIds = capturedTables; this.startupOffset = startupOffset; this.stopOffset = stopOffset; this.completedSnapshotSplitInfos = completedSnapshotSplitInfos; + this.checkpointDataType = checkpointDataType; } } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/split/wartermark/WatermarkEvent.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/split/wartermark/WatermarkEvent.java index 1a120cb3b5c3..4e2a81f3f5c9 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/split/wartermark/WatermarkEvent.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/split/wartermark/WatermarkEvent.java @@ -72,6 +72,28 @@ public static SourceRecord create( signalRecordValue(splitId, watermarkKind)); } + public static SourceRecord createSchemaChangeBeforeWatermark(SourceRecord record) { + return new SourceRecord( + record.sourcePartition(), + record.sourceOffset(), + record.topic(), + SIGNAL_EVENT_KEY_SCHEMA, + signalRecordKey("schema-change-before"), + SIGNAL_EVENT_VALUE_SCHEMA, + signalRecordValue("schema-change-before", WatermarkKind.SCHEMA_CHANGE_BEFORE)); + } + + public static SourceRecord createSchemaChangeAfterWatermark(SourceRecord record) { + return new SourceRecord( + record.sourcePartition(), + record.sourceOffset(), + record.topic(), + SIGNAL_EVENT_KEY_SCHEMA, + signalRecordKey("schema-change-after"), + SIGNAL_EVENT_VALUE_SCHEMA, + signalRecordValue("schema-change-after", WatermarkKind.SCHEMA_CHANGE_AFTER)); + } + public static boolean isWatermarkEvent(SourceRecord record) { Optional watermarkKind = getWatermarkKind(record); return watermarkKind.isPresent(); @@ -92,6 +114,18 @@ public static boolean isEndWatermarkEvent(SourceRecord record) { return watermarkKind.isPresent() && watermarkKind.get() == WatermarkKind.END; } + public static boolean isSchemaChangeBeforeWatermarkEvent(SourceRecord record) { + Optional watermarkKind = getWatermarkKind(record); + return watermarkKind.isPresent() + && watermarkKind.get() == WatermarkKind.SCHEMA_CHANGE_BEFORE; + } + + public static boolean isSchemaChangeAfterWatermarkEvent(SourceRecord record) { + Optional watermarkKind = getWatermarkKind(record); + return watermarkKind.isPresent() + && watermarkKind.get() == WatermarkKind.SCHEMA_CHANGE_AFTER; + } + private static Optional getWatermarkKind(SourceRecord record) { if (record.valueSchema() != null && SIGNAL_EVENT_VALUE_SCHEMA_NAME.equals(record.valueSchema().name())) { diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/split/wartermark/WatermarkKind.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/split/wartermark/WatermarkKind.java index 334bd2c0e41f..cc6a2ee14acf 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/split/wartermark/WatermarkKind.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/source/split/wartermark/WatermarkKind.java @@ -21,6 +21,8 @@ public enum WatermarkKind { LOW, HIGH, + SCHEMA_CHANGE_BEFORE, + SCHEMA_CHANGE_AFTER, END; public WatermarkKind fromString(String kindString) { @@ -28,6 +30,10 @@ public WatermarkKind fromString(String kindString) { return LOW; } else if (HIGH.name().equalsIgnoreCase(kindString)) { return HIGH; + } else if (SCHEMA_CHANGE_BEFORE.name().equalsIgnoreCase(kindString)) { + return SCHEMA_CHANGE_BEFORE; + } else if (SCHEMA_CHANGE_AFTER.name().equalsIgnoreCase(kindString)) { + return SCHEMA_CHANGE_AFTER; } else { return END; } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/utils/SourceRecordUtils.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/utils/SourceRecordUtils.java index e172b389b4a9..872669eacd33 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/utils/SourceRecordUtils.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/base/utils/SourceRecordUtils.java @@ -17,12 +17,14 @@ package org.apache.seatunnel.connectors.cdc.base.utils; +import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.source.SourceRecord; +import io.debezium.connector.AbstractSourceInfo; import io.debezium.data.Envelope; import io.debezium.document.DocumentReader; import io.debezium.relational.TableId; @@ -193,4 +195,18 @@ private static boolean isNumericObject(Object obj) { private static BigDecimal toBigDecimal(Object numericObj) { return new BigDecimal(numericObj.toString()); } + + public static TablePath getTablePath(SourceRecord record) { + Struct messageStruct = (Struct) record.value(); + Struct sourceStruct = messageStruct.getStruct(Envelope.FieldName.SOURCE); + String databaseName = sourceStruct.getString(AbstractSourceInfo.DATABASE_NAME_KEY); + String tableName = sourceStruct.getString(AbstractSourceInfo.TABLE_NAME_KEY); + String schemaName = null; + try { + schemaName = sourceStruct.getString(AbstractSourceInfo.SCHEMA_NAME_KEY); + } catch (Throwable e) { + // ignore + } + return TablePath.of(databaseName, schemaName, tableName); + } } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/debezium/DebeziumDeserializationSchema.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/debezium/DebeziumDeserializationSchema.java index 8cf300376550..8e8cb3c09c26 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/debezium/DebeziumDeserializationSchema.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/debezium/DebeziumDeserializationSchema.java @@ -19,6 +19,7 @@ import org.apache.seatunnel.api.source.Collector; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.connectors.cdc.base.schema.SchemaChangeResolver; import org.apache.kafka.connect.source.SourceRecord; @@ -36,4 +37,10 @@ public interface DebeziumDeserializationSchema extends Serializable { void deserialize(SourceRecord record, Collector out) throws Exception; SeaTunnelDataType getProducedType(); + + default void restoreCheckpointProducedType(SeaTunnelDataType checkpointDataType) {} + + default SchemaChangeResolver getSchemaChangeResolver() { + return null; + } } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/debezium/row/SeaTunnelRowDebeziumDeserializationConverters.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/debezium/row/SeaTunnelRowDebeziumDeserializationConverters.java index bffd3ee43806..bec86250dda6 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/debezium/row/SeaTunnelRowDebeziumDeserializationConverters.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/debezium/row/SeaTunnelRowDebeziumDeserializationConverters.java @@ -210,6 +210,8 @@ public Object convert(Object dbzObj, Schema schema) { return dbzObj; } else if (dbzObj instanceof BigDecimal) { return ((BigDecimal) dbzObj).byteValue(); + } else if (dbzObj instanceof Boolean) { + return Boolean.TRUE.equals(dbzObj) ? Byte.valueOf("1") : Byte.valueOf("0"); } else { return Byte.parseByte(dbzObj.toString()); } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/debezium/row/SeaTunnelRowDebeziumDeserializeSchema.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/debezium/row/SeaTunnelRowDebeziumDeserializeSchema.java index 2997fc78d625..ea0a3fc13e74 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/debezium/row/SeaTunnelRowDebeziumDeserializeSchema.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-base/src/main/java/org/apache/seatunnel/connectors/cdc/debezium/row/SeaTunnelRowDebeziumDeserializeSchema.java @@ -19,11 +19,16 @@ import org.apache.seatunnel.api.source.Collector; import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.event.SchemaChangeEvent; +import org.apache.seatunnel.api.table.event.handler.DataTypeChangeEventDispatcher; +import org.apache.seatunnel.api.table.event.handler.DataTypeChangeEventHandler; import org.apache.seatunnel.api.table.type.MultipleRowType; import org.apache.seatunnel.api.table.type.RowKind; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.cdc.base.schema.SchemaChangeResolver; +import org.apache.seatunnel.connectors.cdc.base.utils.SourceRecordUtils; import org.apache.seatunnel.connectors.cdc.debezium.DebeziumDeserializationConverterFactory; import org.apache.seatunnel.connectors.cdc.debezium.DebeziumDeserializationSchema; import org.apache.seatunnel.connectors.cdc.debezium.MetadataConverter; @@ -32,135 +37,160 @@ import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.source.SourceRecord; -import io.debezium.connector.AbstractSourceInfo; import io.debezium.data.Envelope; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import lombok.Setter; +import lombok.experimental.Accessors; import lombok.extern.slf4j.Slf4j; -import java.io.Serializable; import java.time.ZoneId; -import java.util.Collections; import java.util.HashMap; import java.util.Map; import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.seatunnel.connectors.cdc.base.source.split.wartermark.WatermarkEvent.isSchemaChangeAfterWatermarkEvent; +import static org.apache.seatunnel.connectors.cdc.base.source.split.wartermark.WatermarkEvent.isSchemaChangeBeforeWatermarkEvent; import static org.apache.seatunnel.connectors.cdc.base.utils.SourceRecordUtils.isDataChangeRecord; +import static org.apache.seatunnel.connectors.cdc.base.utils.SourceRecordUtils.isSchemaChangeEvent; /** Deserialization schema from Debezium object to {@link SeaTunnelRow}. */ @Slf4j public final class SeaTunnelRowDebeziumDeserializeSchema implements DebeziumDeserializationSchema { private static final long serialVersionUID = 1L; + private static final String DEFAULT_TABLE_NAME_KEY = null; - /** TypeInformation of the produced {@link SeaTunnelRow}. * */ - private final SeaTunnelDataType resultTypeInfo; - - /** - * Runtime converter that converts Kafka {@link SourceRecord}s into {@link SeaTunnelRow} - * consisted of - */ - private final SeaTunnelRowDebeziumDeserializationConverters singleTableRowConverter; - - private final Map - multipleTableRowConverters; - - /** Validator to validate the row value. */ - private final ValueValidator validator; - - /** Returns a builder to build {@link SeaTunnelRowDebeziumDeserializeSchema}. */ - public static Builder builder() { - return new Builder(); - } + private final MetadataConverter[] metadataConverters; + private final ZoneId serverTimeZone; + private final DebeziumDeserializationConverterFactory userDefinedConverterFactory; + private final SchemaChangeResolver schemaChangeResolver; + private final DataTypeChangeEventHandler dataTypeChangeEventHandler; + private SeaTunnelDataType resultTypeInfo; + private Map tableRowConverters; SeaTunnelRowDebeziumDeserializeSchema( SeaTunnelDataType physicalDataType, MetadataConverter[] metadataConverters, SeaTunnelDataType resultType, - ValueValidator validator, ZoneId serverTimeZone, - DebeziumDeserializationConverterFactory userDefinedConverterFactory) { - - SeaTunnelRowDebeziumDeserializationConverters singleTableRowConverter = null; - Map multipleTableRowConverters = - Collections.emptyMap(); - if (physicalDataType instanceof MultipleRowType) { - multipleTableRowConverters = new HashMap<>(); - for (Map.Entry item : (MultipleRowType) physicalDataType) { - SeaTunnelRowDebeziumDeserializationConverters itemRowConverter = - new SeaTunnelRowDebeziumDeserializationConverters( - item.getValue(), - metadataConverters, - serverTimeZone, - userDefinedConverterFactory); - multipleTableRowConverters.put(item.getKey(), itemRowConverter); - } - } else { - singleTableRowConverter = - new SeaTunnelRowDebeziumDeserializationConverters( - (SeaTunnelRowType) physicalDataType, - metadataConverters, - serverTimeZone, - userDefinedConverterFactory); - } - this.singleTableRowConverter = singleTableRowConverter; - this.multipleTableRowConverters = multipleTableRowConverters; + DebeziumDeserializationConverterFactory userDefinedConverterFactory, + SchemaChangeResolver schemaChangeResolver) { + this.metadataConverters = metadataConverters; + this.serverTimeZone = serverTimeZone; + this.userDefinedConverterFactory = userDefinedConverterFactory; this.resultTypeInfo = checkNotNull(resultType); - this.validator = checkNotNull(validator); + this.schemaChangeResolver = schemaChangeResolver; + this.dataTypeChangeEventHandler = new DataTypeChangeEventDispatcher(); + this.tableRowConverters = + createTableRowConverters( + resultType, + metadataConverters, + serverTimeZone, + userDefinedConverterFactory); } @Override public void deserialize(SourceRecord record, Collector collector) throws Exception { - if (!isDataChangeRecord(record)) { - log.debug("Unsupported record {}, just skip.", record); + if (isSchemaChangeBeforeWatermarkEvent(record)) { + collector.markSchemaChangeBeforeCheckpoint(); + return; + } + if (isSchemaChangeAfterWatermarkEvent(record)) { + collector.markSchemaChangeAfterCheckpoint(); + return; + } + if (isSchemaChangeEvent(record)) { + deserializeSchemaChangeRecord(record, collector); + return; + } + + if (isDataChangeRecord(record)) { + deserializeDataChangeRecord(record, collector); + return; + } + + log.debug("Unsupported record {}, just skip.", record); + } + + private void deserializeSchemaChangeRecord( + SourceRecord record, Collector collector) { + SchemaChangeEvent schemaChangeEvent = schemaChangeResolver.resolve(record, resultTypeInfo); + if (schemaChangeEvent == null) { + log.info("Unsupported resolve schemaChangeEvent {}, just skip.", record); return; } + if (resultTypeInfo instanceof MultipleRowType) { + Map newRowTypeMap = new HashMap<>(); + for (Map.Entry entry : (MultipleRowType) resultTypeInfo) { + if (!entry.getKey().equals(schemaChangeEvent.tablePath().toString())) { + newRowTypeMap.put(entry.getKey(), entry.getValue()); + continue; + } + + log.debug("Table[{}] datatype change before: {}", entry.getKey(), entry.getValue()); + SeaTunnelRowType newRowType = + dataTypeChangeEventHandler.reset(entry.getValue()).apply(schemaChangeEvent); + newRowTypeMap.put(entry.getKey(), newRowType); + log.debug("Table[{}] datatype change after: {}", entry.getKey(), newRowType); + } + resultTypeInfo = new MultipleRowType(newRowTypeMap); + } else { + log.debug("Table datatype change before: {}", resultTypeInfo); + resultTypeInfo = + dataTypeChangeEventHandler + .reset((SeaTunnelRowType) resultTypeInfo) + .apply(schemaChangeEvent); + log.debug("table datatype change after: {}", resultTypeInfo); + } + + tableRowConverters = + createTableRowConverters( + resultTypeInfo, + metadataConverters, + serverTimeZone, + userDefinedConverterFactory); + + collector.collect(schemaChangeEvent); + } + + private void deserializeDataChangeRecord(SourceRecord record, Collector collector) + throws Exception { Envelope.Operation operation = Envelope.operationFor(record); Struct messageStruct = (Struct) record.value(); Schema valueSchema = record.valueSchema(); - - Struct sourceStruct = messageStruct.getStruct(Envelope.FieldName.SOURCE); - String databaseName = sourceStruct.getString(AbstractSourceInfo.DATABASE_NAME_KEY); - String tableName = sourceStruct.getString(AbstractSourceInfo.TABLE_NAME_KEY); - String schemaName = null; - try { - schemaName = sourceStruct.getString(AbstractSourceInfo.SCHEMA_NAME_KEY); - } catch (Throwable e) { - // ignore - } - String tableId = TablePath.of(databaseName, schemaName, tableName).toString(); + TablePath tablePath = SourceRecordUtils.getTablePath(record); + String tableId = tablePath.toString(); SeaTunnelRowDebeziumDeserializationConverters converters; - if (!multipleTableRowConverters.isEmpty()) { - converters = multipleTableRowConverters.get(tableId); + if (resultTypeInfo instanceof MultipleRowType) { + converters = tableRowConverters.get(tableId); if (converters == null) { log.debug("Ignore newly added table {}", tableId); return; } } else { - converters = singleTableRowConverter; + converters = tableRowConverters.get(DEFAULT_TABLE_NAME_KEY); } if (operation == Envelope.Operation.CREATE || operation == Envelope.Operation.READ) { SeaTunnelRow insert = extractAfterRow(converters, record, messageStruct, valueSchema); insert.setRowKind(RowKind.INSERT); insert.setTableId(tableId); - validator.validate(insert, RowKind.INSERT); collector.collect(insert); } else if (operation == Envelope.Operation.DELETE) { SeaTunnelRow delete = extractBeforeRow(converters, record, messageStruct, valueSchema); - validator.validate(delete, RowKind.DELETE); delete.setRowKind(RowKind.DELETE); delete.setTableId(tableId); collector.collect(delete); } else { SeaTunnelRow before = extractBeforeRow(converters, record, messageStruct, valueSchema); - validator.validate(before, RowKind.UPDATE_BEFORE); before.setRowKind(RowKind.UPDATE_BEFORE); before.setTableId(tableId); collector.collect(before); SeaTunnelRow after = extractAfterRow(converters, record, messageStruct, valueSchema); - validator.validate(after, RowKind.UPDATE_AFTER); after.setRowKind(RowKind.UPDATE_AFTER); after.setTableId(tableId); collector.collect(after); @@ -196,64 +226,106 @@ public SeaTunnelDataType getProducedType() { return resultTypeInfo; } - // ------------------------------------------------------------------------------------- - // Builder - // ------------------------------------------------------------------------------------- + @Override + public SchemaChangeResolver getSchemaChangeResolver() { + return schemaChangeResolver; + } + + @Override + public void restoreCheckpointProducedType(SeaTunnelDataType checkpointDataType) { + if (!checkpointDataType.getSqlType().equals(resultTypeInfo.getSqlType())) { + throw new IllegalStateException( + String.format( + "The produced type %s of the SeaTunnel deserialization schema " + + "doesn't match the type %s of the restored snapshot.", + resultTypeInfo.getSqlType(), checkpointDataType.getSqlType())); + } + if (checkpointDataType instanceof MultipleRowType) { + MultipleRowType latestDataType = (MultipleRowType) resultTypeInfo; + Map newRowTypeMap = new HashMap<>(); + for (Map.Entry entry : latestDataType) { + newRowTypeMap.put(entry.getKey(), entry.getValue()); + } + for (Map.Entry entry : (MultipleRowType) checkpointDataType) { + SeaTunnelRowType oldDataType = latestDataType.getRowType(entry.getKey()); + if (oldDataType == null) { + log.info("Ignore restore table[{}] datatype has been deleted.", entry.getKey()); + continue; + } + + log.info("Table[{}] datatype restore before: {}", entry.getKey(), oldDataType); + newRowTypeMap.put(entry.getKey(), entry.getValue()); + log.info("Table[{}] datatype restore after: {}", entry.getKey(), entry.getValue()); + } + resultTypeInfo = new MultipleRowType(newRowTypeMap); + } else { + log.info("Table datatype restore before: {}", resultTypeInfo); + resultTypeInfo = checkpointDataType; + log.info("Table datatype restore after: {}", checkpointDataType); + } + tableRowConverters = + createTableRowConverters( + resultTypeInfo, + metadataConverters, + serverTimeZone, + userDefinedConverterFactory); + } + + private static Map + createTableRowConverters( + SeaTunnelDataType inputDataType, + MetadataConverter[] metadataConverters, + ZoneId serverTimeZone, + DebeziumDeserializationConverterFactory userDefinedConverterFactory) { + Map tableRowConverters = + new HashMap<>(); + if (inputDataType instanceof MultipleRowType) { + for (Map.Entry item : (MultipleRowType) inputDataType) { + SeaTunnelRowDebeziumDeserializationConverters itemRowConverter = + new SeaTunnelRowDebeziumDeserializationConverters( + item.getValue(), + metadataConverters, + serverTimeZone, + userDefinedConverterFactory); + tableRowConverters.put(item.getKey(), itemRowConverter); + } + return tableRowConverters; + } + + SeaTunnelRowDebeziumDeserializationConverters tableRowConverter = + new SeaTunnelRowDebeziumDeserializationConverters( + (SeaTunnelRowType) inputDataType, + metadataConverters, + serverTimeZone, + userDefinedConverterFactory); + tableRowConverters.put(DEFAULT_TABLE_NAME_KEY, tableRowConverter); + return tableRowConverters; + } - /** Custom validator to validate the row value. */ - public interface ValueValidator extends Serializable { - void validate(SeaTunnelRow rowData, RowKind rowKind) throws Exception; + public static Builder builder() { + return new Builder(); } - /** Builder of {@link SeaTunnelRowDebeziumDeserializeSchema}. */ + @Setter + @Accessors(chain = true) + @NoArgsConstructor(access = AccessLevel.PRIVATE) public static class Builder { private SeaTunnelDataType physicalRowType; private SeaTunnelDataType resultTypeInfo; private MetadataConverter[] metadataConverters = new MetadataConverter[0]; - private ValueValidator validator = (rowData, rowKind) -> {}; private ZoneId serverTimeZone = ZoneId.systemDefault(); private DebeziumDeserializationConverterFactory userDefinedConverterFactory = DebeziumDeserializationConverterFactory.DEFAULT; - - public Builder setPhysicalRowType(SeaTunnelDataType physicalRowType) { - this.physicalRowType = physicalRowType; - return this; - } - - public Builder setMetadataConverters(MetadataConverter[] metadataConverters) { - this.metadataConverters = metadataConverters; - return this; - } - - public Builder setResultTypeInfo(SeaTunnelDataType resultTypeInfo) { - this.resultTypeInfo = resultTypeInfo; - return this; - } - - public Builder setValueValidator(ValueValidator validator) { - this.validator = validator; - return this; - } - - public Builder setServerTimeZone(ZoneId serverTimeZone) { - this.serverTimeZone = serverTimeZone; - return this; - } - - public Builder setUserDefinedConverterFactory( - DebeziumDeserializationConverterFactory userDefinedConverterFactory) { - this.userDefinedConverterFactory = userDefinedConverterFactory; - return this; - } + private SchemaChangeResolver schemaChangeResolver; public SeaTunnelRowDebeziumDeserializeSchema build() { return new SeaTunnelRowDebeziumDeserializeSchema( physicalRowType, metadataConverters, resultTypeInfo, - validator, serverTimeZone, - userDefinedConverterFactory); + userDefinedConverterFactory, + schemaChangeResolver); } } } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mongodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mongodb/sender/MongoDBConnectorDeserializationSchema.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mongodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mongodb/sender/MongoDBConnectorDeserializationSchema.java index 75f3564c6c6a..6f36f4be830b 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mongodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mongodb/sender/MongoDBConnectorDeserializationSchema.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mongodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mongodb/sender/MongoDBConnectorDeserializationSchema.java @@ -65,6 +65,7 @@ import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.config.MongodbSourceOptions.ENCODE_VALUE_FIELD; import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.config.MongodbSourceOptions.FULL_DOCUMENT; import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.config.MongodbSourceOptions.ID_FIELD; +import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.utils.MongodbRecordUtils.extractBsonDocument; import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkNotNull; public class MongoDBConnectorDeserializationSchema @@ -154,17 +155,6 @@ private SeaTunnelRow extractRowData(BsonDocument document) { return (SeaTunnelRow) physicalConverter.convert(document); } - private BsonDocument extractBsonDocument( - Struct value, @Nonnull Schema valueSchema, String fieldName) { - if (valueSchema.field(fieldName) != null) { - String docString = value.getString(fieldName); - if (docString != null) { - return BsonDocument.parse(docString); - } - } - return null; - } - // ------------------------------------------------------------------------------------- // Runtime Converters // ------------------------------------------------------------------------------------- diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mongodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mongodb/source/fetch/MongodbFetchTaskContext.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mongodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mongodb/source/fetch/MongodbFetchTaskContext.java index 534baa72abda..fa0931a80704 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mongodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mongodb/source/fetch/MongodbFetchTaskContext.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mongodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mongodb/source/fetch/MongodbFetchTaskContext.java @@ -27,10 +27,13 @@ import org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.source.offset.ChangeStreamOffset; import org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.utils.MongodbRecordUtils; +import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.source.SourceRecord; import org.bson.BsonDocument; +import org.bson.BsonInt64; +import org.bson.BsonString; import org.bson.BsonType; import org.bson.BsonValue; @@ -50,12 +53,21 @@ import java.util.stream.Collectors; import static org.apache.seatunnel.common.exception.CommonErrorCode.ILLEGAL_ARGUMENT; +import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.config.MongodbSourceOptions.COLL_FIELD; +import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.config.MongodbSourceOptions.DB_FIELD; +import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.config.MongodbSourceOptions.DOCUMENT_KEY; +import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.config.MongodbSourceOptions.FULL_DOCUMENT; +import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.config.MongodbSourceOptions.ID_FIELD; +import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.config.MongodbSourceOptions.NS_FIELD; import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.config.MongodbSourceOptions.OPERATION_TYPE; +import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.config.MongodbSourceOptions.OPERATION_TYPE_INSERT; import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.config.MongodbSourceOptions.SNAPSHOT_FIELD; import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.config.MongodbSourceOptions.SNAPSHOT_TRUE; import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.config.MongodbSourceOptions.SOURCE_FIELD; import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.config.MongodbSourceOptions.TS_MS_FIELD; import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.utils.BsonUtils.compareBsonValue; +import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.utils.MongodbRecordUtils.buildSourceRecord; +import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.utils.MongodbRecordUtils.extractBsonDocument; import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.utils.MongodbRecordUtils.getDocumentKey; import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.utils.MongodbRecordUtils.getResumeToken; import static org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.utils.MongodbUtils.createMongoClient; @@ -139,11 +151,11 @@ public boolean isDataChangeRecord(SourceRecord record) { public boolean isRecordBetween( SourceRecord record, @Nonnull Object[] splitStart, @Nonnull Object[] splitEnd) { BsonDocument documentKey = getDocumentKey(record); - BsonDocument splitKeys = (BsonDocument) ((Object[]) splitStart[0])[0]; + BsonDocument splitKeys = (BsonDocument) splitStart[0]; String firstKey = splitKeys.getFirstKey(); BsonValue keyValue = documentKey.get(firstKey); - BsonValue lowerBound = ((BsonDocument) ((Object[]) splitEnd[0])[1]).get(firstKey); - BsonValue upperBound = ((BsonDocument) ((Object[]) splitEnd[0])[1]).get(firstKey); + BsonValue lowerBound = ((BsonDocument) splitStart[1]).get(firstKey); + BsonValue upperBound = ((BsonDocument) splitEnd[1]).get(firstKey); if (isFullRange(lowerBound, upperBound)) { return true; @@ -172,9 +184,27 @@ public void rewriteOutputBuffer( switch (OperationType.fromString(operationType)) { case INSERT: + outputBuffer.put(key, changeRecord); + break; case UPDATE: case REPLACE: - outputBuffer.put(key, changeRecord); + Schema valueSchema = changeRecord.valueSchema(); + BsonDocument fullDocument = + extractBsonDocument(value, valueSchema, FULL_DOCUMENT); + if (fullDocument == null) { + break; + } + BsonDocument valueDocument = normalizeSnapshotDocument(fullDocument, value); + SourceRecord record = + buildSourceRecord( + changeRecord.sourcePartition(), + changeRecord.sourceOffset(), + changeRecord.topic(), + changeRecord.kafkaPartition(), + changeRecord.keySchema(), + changeRecord.key(), + valueDocument); + outputBuffer.put(key, record); break; case DELETE: outputBuffer.remove(key); @@ -202,6 +232,30 @@ record -> { .collect(Collectors.toList()); } + private BsonDocument normalizeSnapshotDocument( + @Nonnull final BsonDocument fullDocument, Struct value) { + return new BsonDocument() + .append(ID_FIELD, new BsonString(value.getString(DOCUMENT_KEY))) + .append(OPERATION_TYPE, new BsonString(OPERATION_TYPE_INSERT)) + .append( + NS_FIELD, + new BsonDocument( + DB_FIELD, + new BsonString( + value.getStruct(NS_FIELD).getString(DB_FIELD))) + .append( + COLL_FIELD, + new BsonString( + value.getStruct(NS_FIELD).getString(COLL_FIELD)))) + .append(DOCUMENT_KEY, new BsonString(value.getString(DOCUMENT_KEY))) + .append(FULL_DOCUMENT, fullDocument) + .append(TS_MS_FIELD, new BsonInt64(value.getInt64(TS_MS_FIELD))) + .append( + SOURCE_FIELD, + new BsonDocument(SNAPSHOT_FIELD, new BsonString(SNAPSHOT_TRUE)) + .append(TS_MS_FIELD, new BsonInt64(0L))); + } + @Override public void close() { Runtime.getRuntime() diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mongodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mongodb/utils/MongodbRecordUtils.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mongodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mongodb/utils/MongodbRecordUtils.java index c4d51c59e419..1e9ab5772294 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mongodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mongodb/utils/MongodbRecordUtils.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mongodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mongodb/utils/MongodbRecordUtils.java @@ -18,6 +18,7 @@ package org.apache.seatunnel.connectors.seatunnel.cdc.mongodb.utils; import org.apache.commons.lang3.StringUtils; +import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.SchemaAndValue; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.source.SourceRecord; @@ -66,7 +67,18 @@ public static BsonDocument getResumeToken(SourceRecord sourceRecord) { public static BsonDocument getDocumentKey(@Nonnull SourceRecord sourceRecord) { Struct value = (Struct) sourceRecord.value(); - return BsonDocument.parse(value.getString(DOCUMENT_KEY)); + return extractBsonDocument(value, sourceRecord.valueSchema(), DOCUMENT_KEY); + } + + public static BsonDocument extractBsonDocument( + Struct value, @Nonnull Schema valueSchema, String fieldName) { + if (valueSchema.field(fieldName) != null) { + String docString = value.getString(fieldName); + if (docString != null) { + return BsonDocument.parse(docString); + } + } + return null; } public static String getOffsetValue(@Nonnull SourceRecord sourceRecord, String key) { @@ -139,6 +151,30 @@ public static String getOffsetValue(@Nonnull SourceRecord sourceRecord, String k valueSchemaAndValue.value()); } + public static @Nonnull SourceRecord buildSourceRecord( + Map sourcePartition, + Map sourceOffset, + String topicName, + Integer partition, + Schema keySchema, + Object key, + BsonDocument valueDocument) { + BsonValueToSchemaAndValue schemaAndValue = + new BsonValueToSchemaAndValue(new DefaultJson().getJsonWriterSettings()); + SchemaAndValue valueSchemaAndValue = + schemaAndValue.toSchemaAndValue(fromJson(OUTPUT_SCHEMA), valueDocument); + + return new SourceRecord( + sourcePartition, + sourceOffset, + topicName, + partition, + keySchema, + key, + valueSchemaAndValue.schema(), + valueSchemaAndValue.value()); + } + public static @Nonnull Map createSourceOffsetMap( @Nonnull BsonDocument idDocument, boolean isSnapshotRecord) { Map sourceOffset = new HashMap<>(); diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/config/ServerIdRange.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/config/ServerIdRange.java index c3319f8a8e9e..a012cf4c4b49 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/config/ServerIdRange.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/config/ServerIdRange.java @@ -21,7 +21,7 @@ import java.io.Serializable; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; /** * This class defines a range of server id. The boundaries of the range are inclusive. diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlIncrementalSourceFactory.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlIncrementalSourceFactory.java index 396fd7bae9d4..6429fa4b5299 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlIncrementalSourceFactory.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlIncrementalSourceFactory.java @@ -68,8 +68,13 @@ public OptionRule optionRule() { JdbcSourceOptions.CONNECTION_POOL_SIZE, JdbcSourceOptions.CHUNK_KEY_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND, JdbcSourceOptions.CHUNK_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND, - JdbcSourceOptions.SAMPLE_SHARDING_THRESHOLD) + JdbcSourceOptions.SAMPLE_SHARDING_THRESHOLD, + JdbcSourceOptions.INVERSE_SAMPLING_RATE) .optional(MySqlSourceOptions.STARTUP_MODE, MySqlSourceOptions.STOP_MODE) + .conditional( + MySqlSourceOptions.STARTUP_MODE, + StartupMode.INITIAL, + SourceOptions.EXACTLY_ONCE) .conditional( MySqlSourceOptions.STARTUP_MODE, StartupMode.SPECIFIC, @@ -80,18 +85,6 @@ public OptionRule optionRule() { StopMode.SPECIFIC, SourceOptions.STOP_SPECIFIC_OFFSET_FILE, SourceOptions.STOP_SPECIFIC_OFFSET_POS) - .conditional( - MySqlSourceOptions.STARTUP_MODE, - StartupMode.TIMESTAMP, - SourceOptions.STARTUP_TIMESTAMP) - .conditional( - MySqlSourceOptions.STOP_MODE, - StopMode.TIMESTAMP, - SourceOptions.STOP_TIMESTAMP) - .conditional( - MySqlSourceOptions.STARTUP_MODE, - StartupMode.INITIAL, - SourceOptions.EXACTLY_ONCE) .build(); } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlSourceOptions.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlSourceOptions.java index 43f3f4c70cc8..bc59fd0f5c16 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlSourceOptions.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlSourceOptions.java @@ -34,18 +34,22 @@ public class MySqlSourceOptions { Arrays.asList( StartupMode.INITIAL, StartupMode.EARLIEST, - StartupMode.LATEST)) + StartupMode.LATEST, + StartupMode.SPECIFIC)) .defaultValue(StartupMode.INITIAL) .withDescription( "Optional startup mode for CDC source, valid enumerations are " - + "\"initial\", \"earliest\", \"latest\", \"timestamp\"\n or \"specific\""); + + "\"initial\", \"earliest\", \"latest\" or \"specific\""); public static final SingleChoiceOption STOP_MODE = (SingleChoiceOption) Options.key(SourceOptions.STOP_MODE_KEY) - .singleChoice(StopMode.class, Arrays.asList(StopMode.NEVER)) + .singleChoice( + StopMode.class, + Arrays.asList( + StopMode.LATEST, StopMode.SPECIFIC, StopMode.NEVER)) .defaultValue(StopMode.NEVER) .withDescription( "Optional stop mode for CDC source, valid enumerations are " - + "\"never\", \"latest\", \"timestamp\"\n or \"specific\""); + + "\"never\", \"latest\" or \"specific\""); } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/eumerator/MySqlChunkSplitter.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/eumerator/MySqlChunkSplitter.java index 0249889b239c..c078f7cf28c7 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/eumerator/MySqlChunkSplitter.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/eumerator/MySqlChunkSplitter.java @@ -28,10 +28,12 @@ import io.debezium.jdbc.JdbcConnection; import io.debezium.relational.Column; import io.debezium.relational.TableId; +import lombok.extern.slf4j.Slf4j; import java.sql.SQLException; /** The {@code ChunkSplitter} used to split table into a set of chunks for JDBC data source. */ +@Slf4j public class MySqlChunkSplitter extends AbstractJdbcSourceChunkSplitter { public MySqlChunkSplitter(JdbcSourceConfig sourceConfig, JdbcDataSourceDialect dialect) { @@ -55,7 +57,7 @@ public Object queryMin( public Object[] sampleDataFromColumn( JdbcConnection jdbc, TableId tableId, String columnName, int inverseSamplingRate) throws SQLException { - return MySqlUtils.sampleDataFromColumn(jdbc, tableId, columnName, inverseSamplingRate); + return MySqlUtils.skipReadAndSortSampleData(jdbc, tableId, columnName, inverseSamplingRate); } @Override diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/offset/BinlogOffset.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/offset/BinlogOffset.java index 0d91c02fee7a..f94401ba3454 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/offset/BinlogOffset.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/offset/BinlogOffset.java @@ -204,7 +204,6 @@ public int compareTo(Offset offset) { return Long.compare(this.getRestartSkipRows(), that.getRestartSkipRows()); } - @SuppressWarnings("checkstyle:EqualsHashCode") @Override public boolean equals(Object o) { if (this == o) { diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/MySqlConnectionUtils.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/MySqlConnectionUtils.java index 4cf26d5a82ca..3a63c5d09013 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/MySqlConnectionUtils.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/MySqlConnectionUtils.java @@ -73,7 +73,6 @@ public static MySqlDatabaseSchema createMySqlDatabaseSchema( } /** Fetch earliest binlog offsets in MySql Server. */ - @SuppressWarnings("checkstyle:MagicNumber") public static BinlogOffset earliestBinlogOffset(JdbcConnection jdbc) { final String showMasterStmt = "SHOW MASTER LOGS"; JdbcConnection.ResultSetMapper getCurrentBinlogOffset = @@ -87,7 +86,6 @@ public static BinlogOffset earliestBinlogOffset(JdbcConnection jdbc) { } /** Fetch current binlog offsets in MySql Server. */ - @SuppressWarnings("checkstyle:MagicNumber") public static BinlogOffset currentBinlogOffset(JdbcConnection jdbc) { final String showMasterStmt = "SHOW MASTER STATUS"; JdbcConnection.ResultSetMapper getCurrentBinlogOffset = diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/MySqlTypeUtils.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/MySqlTypeUtils.java index 267476b3ffea..00c10f53cb5e 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/MySqlTypeUtils.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/MySqlTypeUtils.java @@ -50,10 +50,14 @@ public class MySqlTypeUtils { private static final String MYSQL_BIGINT_UNSIGNED = "BIGINT UNSIGNED"; private static final String MYSQL_DECIMAL = "DECIMAL"; private static final String MYSQL_DECIMAL_UNSIGNED = "DECIMAL UNSIGNED"; + private static final String MYSQL_NUMERIC = "NUMERIC"; + private static final String MYSQL_NUMERIC_UNSIGNED = "NUMERIC UNSIGNED"; private static final String MYSQL_FLOAT = "FLOAT"; private static final String MYSQL_FLOAT_UNSIGNED = "FLOAT UNSIGNED"; private static final String MYSQL_DOUBLE = "DOUBLE"; private static final String MYSQL_DOUBLE_UNSIGNED = "DOUBLE UNSIGNED"; + private static final String MYSQL_REAL = "REAL"; + private static final String MYSQL_REAL_UNSIGNED = "REAL UNSIGNED"; // -------------------------string---------------------------- private static final String MYSQL_CHAR = "CHAR"; @@ -63,6 +67,7 @@ public class MySqlTypeUtils { private static final String MYSQL_TEXT = "TEXT"; private static final String MYSQL_LONGTEXT = "LONGTEXT"; private static final String MYSQL_JSON = "JSON"; + private static final String MYSQL_ENUM = "ENUM"; // ------------------------------time------------------------- private static final String MYSQL_DATE = "DATE"; @@ -80,7 +85,6 @@ public class MySqlTypeUtils { private static final String MYSQL_VARBINARY = "VARBINARY"; private static final String MYSQL_GEOMETRY = "GEOMETRY"; - @SuppressWarnings("checkstyle:MagicNumber") public static SeaTunnelDataType convertFromColumn(Column column) { String typeName = column.typeName(); switch (typeName) { @@ -90,6 +94,7 @@ public static SeaTunnelDataType convertFromColumn(Column column) { return column.length() == 1 ? BasicType.BOOLEAN_TYPE : BasicType.INT_TYPE; case MYSQL_TINYINT_UNSIGNED: case MYSQL_SMALLINT: + return BasicType.SHORT_TYPE; case MYSQL_SMALLINT_UNSIGNED: case MYSQL_MEDIUMINT: case MYSQL_MEDIUMINT_UNSIGNED: @@ -104,6 +109,9 @@ public static SeaTunnelDataType convertFromColumn(Column column) { case MYSQL_BIGINT_UNSIGNED: return new DecimalType(20, 0); case MYSQL_DECIMAL: + case MYSQL_DECIMAL_UNSIGNED: + case MYSQL_NUMERIC: + case MYSQL_NUMERIC_UNSIGNED: return new DecimalType(column.length(), column.scale().orElse(0)); case MYSQL_FLOAT: return BasicType.FLOAT_TYPE; @@ -111,8 +119,10 @@ public static SeaTunnelDataType convertFromColumn(Column column) { log.warn("{} will probably cause value overflow.", MYSQL_FLOAT_UNSIGNED); return BasicType.FLOAT_TYPE; case MYSQL_DOUBLE: + case MYSQL_REAL: return BasicType.DOUBLE_TYPE; case MYSQL_DOUBLE_UNSIGNED: + case MYSQL_REAL_UNSIGNED: log.warn("{} will probably cause value overflow.", MYSQL_DOUBLE_UNSIGNED); return BasicType.DOUBLE_TYPE; case MYSQL_CHAR: @@ -121,6 +131,7 @@ public static SeaTunnelDataType convertFromColumn(Column column) { case MYSQL_TEXT: case MYSQL_VARCHAR: case MYSQL_JSON: + case MYSQL_ENUM: return BasicType.STRING_TYPE; case MYSQL_LONGTEXT: log.warn( diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/MySqlUtils.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/MySqlUtils.java index c9223c81ff21..3dde38b42253 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/MySqlUtils.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/MySqlUtils.java @@ -36,11 +36,13 @@ import io.debezium.relational.TableId; import io.debezium.schema.TopicSelector; import io.debezium.util.SchemaNameAdjuster; +import lombok.extern.slf4j.Slf4j; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; +import java.sql.Statement; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -52,6 +54,7 @@ import static org.apache.seatunnel.connectors.cdc.base.utils.SourceRecordUtils.rowToArray; /** Utils to prepare MySQL SQL statement. */ +@Slf4j public class MySqlUtils { private MySqlUtils() {} @@ -76,7 +79,6 @@ public static Object[] queryMinMax(JdbcConnection jdbc, TableId tableId, String }); } - @SuppressWarnings("checkstyle:MagicNumber") public static long queryApproximateRowCnt(JdbcConnection jdbc, TableId tableId) throws SQLException { // The statement used to get approximate row count which is less @@ -142,6 +144,56 @@ public static Object[] sampleDataFromColumn( }); } + public static Object[] skipReadAndSortSampleData( + JdbcConnection jdbc, TableId tableId, String columnName, int inverseSamplingRate) + throws SQLException { + final String sampleQuery = + String.format("SELECT %s FROM %s", quote(columnName), quote(tableId)); + + Statement stmt = null; + ResultSet rs = null; + + List results = new ArrayList<>(); + try { + stmt = + jdbc.connection() + .createStatement( + ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); + + stmt.setFetchSize(Integer.MIN_VALUE); + rs = stmt.executeQuery(sampleQuery); + + int count = 0; + while (rs.next()) { + count++; + if (count % 100000 == 0) { + log.info("Processing row index: {}", count); + } + if (count % inverseSamplingRate == 0) { + results.add(rs.getObject(1)); + } + } + } finally { + if (rs != null) { + try { + rs.close(); + } catch (SQLException e) { + log.error("Failed to close ResultSet", e); + } + } + if (stmt != null) { + try { + stmt.close(); + } catch (SQLException e) { + log.error("Failed to close Statement", e); + } + } + } + Object[] resultsArray = results.toArray(); + Arrays.sort(resultsArray); + return resultsArray; + } + public static Object queryNextChunkMax( JdbcConnection jdbc, TableId tableId, diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/TableDiscoveryUtils.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/TableDiscoveryUtils.java index 151dcca2f3bb..0f635c5c7a8d 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/TableDiscoveryUtils.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/TableDiscoveryUtils.java @@ -48,7 +48,10 @@ public static List listTables(JdbcConnection jdbc, RelationalTableFilte "SHOW DATABASES", rs -> { while (rs.next()) { - databaseNames.add(rs.getString(1)); + String databaseName = rs.getString(1); + if (tableFilters.databaseFilter().test(databaseName)) { + databaseNames.add(databaseName); + } } }); LOG.info("\t list of available databases is: {}", databaseNames); diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-sqlserver/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/sqlserver/source/source/eumerator/SqlServerChunkSplitter.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-sqlserver/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/sqlserver/source/source/eumerator/SqlServerChunkSplitter.java index 7efd53dc3fc2..1dc97020be5d 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-sqlserver/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/sqlserver/source/source/eumerator/SqlServerChunkSplitter.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-sqlserver/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/sqlserver/source/source/eumerator/SqlServerChunkSplitter.java @@ -57,7 +57,8 @@ public Object queryMin( public Object[] sampleDataFromColumn( JdbcConnection jdbc, TableId tableId, String columnName, int inverseSamplingRate) throws SQLException { - return SqlServerUtils.sampleDataFromColumn(jdbc, tableId, columnName, inverseSamplingRate); + return SqlServerUtils.skipReadAndSortSampleData( + jdbc, tableId, columnName, inverseSamplingRate); } @Override diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-sqlserver/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/sqlserver/source/utils/SqlServerUtils.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-sqlserver/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/sqlserver/source/utils/SqlServerUtils.java index a1271849843c..d6e58825dabe 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-sqlserver/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/sqlserver/source/utils/SqlServerUtils.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-sqlserver/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/sqlserver/source/utils/SqlServerUtils.java @@ -39,10 +39,13 @@ import io.debezium.relational.TableId; import io.debezium.schema.TopicSelector; import io.debezium.util.SchemaNameAdjuster; +import lombok.extern.slf4j.Slf4j; import java.sql.Connection; import java.sql.PreparedStatement; +import java.sql.ResultSet; import java.sql.SQLException; +import java.sql.Statement; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -52,6 +55,7 @@ import java.util.Optional; /** The utils for SqlServer data source. */ +@Slf4j public class SqlServerUtils { public SqlServerUtils() {} @@ -145,6 +149,56 @@ public static Object[] sampleDataFromColumn( }); } + public static Object[] skipReadAndSortSampleData( + JdbcConnection jdbc, TableId tableId, String columnName, int inverseSamplingRate) + throws SQLException { + final String sampleQuery = + String.format("SELECT %s FROM %s", quote(columnName), quote(tableId)); + + Statement stmt = null; + ResultSet rs = null; + + List results = new ArrayList<>(); + try { + stmt = + jdbc.connection() + .createStatement( + ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); + + stmt.setFetchSize(Integer.MIN_VALUE); + rs = stmt.executeQuery(sampleQuery); + + int count = 0; + while (rs.next()) { + count++; + if (count % 100000 == 0) { + log.info("Processing row index: {}", count); + } + if (count % inverseSamplingRate == 0) { + results.add(rs.getObject(1)); + } + } + } finally { + if (rs != null) { + try { + rs.close(); + } catch (SQLException e) { + log.error("Failed to close ResultSet", e); + } + } + if (stmt != null) { + try { + stmt.close(); + } catch (SQLException e) { + log.error("Failed to close Statement", e); + } + } + } + Object[] resultsArray = results.toArray(); + Arrays.sort(resultsArray); + return resultsArray; + } + /** * Returns the next LSN to be read from the database. This is the LSN of the last record that * was read from the database. diff --git a/seatunnel-connectors-v2/connector-cdc/pom.xml b/seatunnel-connectors-v2/connector-cdc/pom.xml index 38f52b1680d7..3519b1c51c08 100644 --- a/seatunnel-connectors-v2/connector-cdc/pom.xml +++ b/seatunnel-connectors-v2/connector-cdc/pom.xml @@ -38,5 +38,42 @@ 1.6.4.Final + 4.8 + + + + + org.antlr + antlr4 + ${antlr.version} + + + + + + + + + org.antlr + antlr4-maven-plugin + ${antlr.version} + + src/main/antlr4 + src/main/java + true + true + true + + + + + antlr4 + + + + + + + diff --git a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/ClickhouseConfig.java b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/ClickhouseConfig.java index f7c8e032ccf3..bb0417b17123 100644 --- a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/ClickhouseConfig.java +++ b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/config/ClickhouseConfig.java @@ -25,7 +25,6 @@ import java.util.List; import java.util.Map; -@SuppressWarnings("checkstyle:MagicNumber") public class ClickhouseConfig { /** Bulk size of clickhouse jdbc */ diff --git a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java index 360c59259882..fe1b25e9092e 100644 --- a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java +++ b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSink.java @@ -79,7 +79,6 @@ public String getPluginName() { return "Clickhouse"; } - @SuppressWarnings("checkstyle:MagicNumber") @Override public void prepare(Config config) throws PrepareFailException { CheckResult result = diff --git a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java index 235279b4d5a5..6220e4b80712 100644 --- a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java +++ b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java @@ -90,7 +90,14 @@ public void write(SeaTunnelRow element) throws IOException { @Override public Optional prepareCommit() throws IOException { - flush(); + for (ClickhouseBatchStatement batchStatement : statementMap.values()) { + JdbcBatchStatementExecutor statement = batchStatement.getJdbcBatchStatementExecutor(); + IntHolder intHolder = batchStatement.getIntHolder(); + if (intHolder.getValue() > 0) { + flush(statement); + intHolder.setValue(0); + } + } return Optional.empty(); } @@ -208,7 +215,8 @@ private static boolean clickhouseServerEnableExperimentalLightweightDelete( } return false; } catch (SQLException e) { - throw new ClickhouseConnectorException(CommonErrorCode.SQL_OPERATION_FAILED, e); + log.warn("Failed to get clickhouse server config: {}", configKey, e); + return false; } } } diff --git a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/executor/FieldNamedPreparedStatement.java b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/executor/FieldNamedPreparedStatement.java index 58c7ce650b58..5fa82e8c59a8 100644 --- a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/executor/FieldNamedPreparedStatement.java +++ b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/executor/FieldNamedPreparedStatement.java @@ -46,8 +46,8 @@ import java.util.List; import java.util.Map; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkNotNull; @RequiredArgsConstructor public class FieldNamedPreparedStatement implements PreparedStatement { diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/source/AbstractSingleSplitSource.java b/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/source/AbstractSingleSplitSource.java index fb4c5c85277a..360e8601a225 100644 --- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/source/AbstractSingleSplitSource.java +++ b/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/source/AbstractSingleSplitSource.java @@ -23,7 +23,7 @@ import org.apache.seatunnel.api.source.SourceReader; import org.apache.seatunnel.api.source.SourceSplitEnumerator; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; public abstract class AbstractSingleSplitSource implements SeaTunnelSource { diff --git a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSink.java b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSink.java index 036a5d802f4a..49957b99e215 100644 --- a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSink.java +++ b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSink.java @@ -19,6 +19,7 @@ import org.apache.seatunnel.shade.com.typesafe.config.Config; +import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.sink.SeaTunnelSink; import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; @@ -30,13 +31,20 @@ import com.google.auto.service.AutoService; import lombok.NoArgsConstructor; +import static org.apache.seatunnel.connectors.seatunnel.console.sink.ConsoleSinkFactory.LOG_PRINT_DATA; +import static org.apache.seatunnel.connectors.seatunnel.console.sink.ConsoleSinkFactory.LOG_PRINT_DELAY; + @NoArgsConstructor @AutoService(SeaTunnelSink.class) public class ConsoleSink extends AbstractSimpleSink { private SeaTunnelRowType seaTunnelRowType; + private boolean isPrintData = true; + private int delayMs = 0; - public ConsoleSink(SeaTunnelRowType seaTunnelRowType) { + public ConsoleSink(SeaTunnelRowType seaTunnelRowType, ReadonlyConfig options) { this.seaTunnelRowType = seaTunnelRowType; + this.isPrintData = options.get(LOG_PRINT_DATA); + this.delayMs = options.get(LOG_PRINT_DELAY); } @Override @@ -51,7 +59,7 @@ public SeaTunnelDataType getConsumedType() { @Override public AbstractSinkWriter createWriter(SinkWriter.Context context) { - return new ConsoleSinkWriter(seaTunnelRowType, context); + return new ConsoleSinkWriter(seaTunnelRowType, context, isPrintData, delayMs); } @Override @@ -60,5 +68,8 @@ public String getPluginName() { } @Override - public void prepare(Config pluginConfig) {} + public void prepare(Config pluginConfig) { + this.isPrintData = ReadonlyConfig.fromConfig(pluginConfig).get(LOG_PRINT_DATA); + this.delayMs = ReadonlyConfig.fromConfig(pluginConfig).get(LOG_PRINT_DELAY); + } } diff --git a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkFactory.java b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkFactory.java index 1e0450d66c7b..5a66493aee5e 100644 --- a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkFactory.java +++ b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkFactory.java @@ -17,6 +17,9 @@ package org.apache.seatunnel.connectors.seatunnel.console.sink; +import org.apache.seatunnel.api.configuration.Option; +import org.apache.seatunnel.api.configuration.Options; +import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; @@ -27,6 +30,21 @@ @AutoService(Factory.class) public class ConsoleSinkFactory implements TableSinkFactory { + + public static final Option LOG_PRINT_DATA = + Options.key("log.print.data") + .booleanType() + .defaultValue(true) + .withDescription( + "Flag to determine whether data should be printed in the logs."); + + public static final Option LOG_PRINT_DELAY = + Options.key("log.print.delay.ms") + .intType() + .defaultValue(0) + .withDescription( + "Delay in milliseconds between printing each data item to the logs."); + @Override public String factoryIdentifier() { return "Console"; @@ -39,7 +57,10 @@ public OptionRule optionRule() { @Override public TableSink createSink(TableFactoryContext context) { + ReadonlyConfig options = context.getOptions(); return () -> - new ConsoleSink(context.getCatalogTable().getTableSchema().toPhysicalRowDataType()); + new ConsoleSink( + context.getCatalogTable().getTableSchema().toPhysicalRowDataType(), + options); } } diff --git a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriter.java b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriter.java index debd6284ec15..c8c6c945ff1b 100644 --- a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriter.java +++ b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriter.java @@ -18,10 +18,14 @@ package org.apache.seatunnel.connectors.seatunnel.console.sink; import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.event.SchemaChangeEvent; +import org.apache.seatunnel.api.table.event.handler.DataTypeChangeEventDispatcher; +import org.apache.seatunnel.api.table.event.handler.DataTypeChangeEventHandler; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.common.utils.JsonUtils; +import org.apache.seatunnel.common.utils.SeaTunnelException; import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSinkWriter; import org.apache.commons.lang3.StringUtils; @@ -36,18 +40,35 @@ @Slf4j public class ConsoleSinkWriter extends AbstractSinkWriter { - private final SeaTunnelRowType seaTunnelRowType; - public final AtomicLong rowCounter = new AtomicLong(0); - public SinkWriter.Context context; + private SeaTunnelRowType seaTunnelRowType; + private final AtomicLong rowCounter = new AtomicLong(0); + private final SinkWriter.Context context; + private final DataTypeChangeEventHandler dataTypeChangeEventHandler; - public ConsoleSinkWriter(SeaTunnelRowType seaTunnelRowType, SinkWriter.Context context) { + boolean isPrintData = true; + int delayMs = 0; + + public ConsoleSinkWriter( + SeaTunnelRowType seaTunnelRowType, + SinkWriter.Context context, + boolean isPrintData, + int delayMs) { this.seaTunnelRowType = seaTunnelRowType; this.context = context; + this.isPrintData = isPrintData; + this.delayMs = delayMs; + this.dataTypeChangeEventHandler = new DataTypeChangeEventDispatcher(); log.info("output rowType: {}", fieldsInfo(seaTunnelRowType)); } @Override - @SuppressWarnings("checkstyle:RegexpSingleline") + public void applySchemaChange(SchemaChangeEvent event) { + log.info("changed rowType before: {}", fieldsInfo(seaTunnelRowType)); + seaTunnelRowType = dataTypeChangeEventHandler.reset(seaTunnelRowType).apply(event); + log.info("changed rowType after: {}", fieldsInfo(seaTunnelRowType)); + } + + @Override public void write(SeaTunnelRow element) { String[] arr = new String[seaTunnelRowType.getTotalFields()]; SeaTunnelDataType[] fieldTypes = seaTunnelRowType.getFieldTypes(); @@ -55,13 +76,23 @@ public void write(SeaTunnelRow element) { for (int i = 0; i < fieldTypes.length; i++) { arr[i] = fieldToString(fieldTypes[i], fields[i]); } - log.info( - "subtaskIndex={} rowIndex={}: SeaTunnelRow#tableId={} SeaTunnelRow#kind={} : {}", - context.getIndexOfSubtask(), - rowCounter.incrementAndGet(), - element.getTableId(), - element.getRowKind(), - StringUtils.join(arr, ", ")); + if (isPrintData) { + log.info( + "subtaskIndex={} rowIndex={}: SeaTunnelRow#tableId={} SeaTunnelRow#kind={} : {}", + context.getIndexOfSubtask(), + rowCounter.incrementAndGet(), + element.getTableId(), + element.getRowKind(), + StringUtils.join(arr, ", ")); + } + if (delayMs > 0) { + try { + Thread.sleep(delayMs); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new SeaTunnelException(e); + } + } } @Override diff --git a/seatunnel-connectors-v2/connector-console/src/test/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriterIT.java b/seatunnel-connectors-v2/connector-console/src/test/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriterIT.java index 0220c8896295..e03c00c49599 100644 --- a/seatunnel-connectors-v2/connector-console/src/test/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriterIT.java +++ b/seatunnel-connectors-v2/connector-console/src/test/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriterIT.java @@ -48,7 +48,7 @@ void setUp() { String[] fieldNames = {}; SeaTunnelDataType[] fieldTypes = {}; SeaTunnelRowType seaTunnelRowType = new SeaTunnelRowType(fieldNames, fieldTypes); - consoleSinkWriter = new ConsoleSinkWriter(seaTunnelRowType, null); + consoleSinkWriter = new ConsoleSinkWriter(seaTunnelRowType, null, true, 0); } private Object fieldToStringTest(SeaTunnelDataType dataType, Object value) { diff --git a/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/config/SinkConfig.java b/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/config/SinkConfig.java index 34899ea6e397..beccebe3c15f 100644 --- a/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/config/SinkConfig.java +++ b/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/config/SinkConfig.java @@ -51,14 +51,12 @@ public class SinkConfig { .withDescription( "Delimiter for composite keys (\"_\" by default), e.g., \"$\" would result in document `_id` \"KEY1$KEY2$KEY3\"."); - @SuppressWarnings("checkstyle:MagicNumber") public static final Option MAX_BATCH_SIZE = Options.key("max_batch_size") .intType() .defaultValue(10) .withDescription("batch bulk doc max size"); - @SuppressWarnings("checkstyle:MagicNumber") public static final Option MAX_RETRY_COUNT = Options.key("max_retry_count") .intType() diff --git a/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/config/SourceConfig.java b/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/config/SourceConfig.java index f5c885ed3bd5..81c32bdf15e6 100644 --- a/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/config/SourceConfig.java +++ b/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/config/SourceConfig.java @@ -47,7 +47,6 @@ public class SourceConfig { .withDescription( "Amount of time Elasticsearch will keep the search context alive for scroll requests"); - @SuppressWarnings("checkstyle:MagicNumber") public static final Option SCROLL_SIZE = Options.key("scroll_size") .intType() diff --git a/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/config/FakeOption.java b/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/config/FakeOption.java index 34aa9069c59d..d16d5c4f5939 100644 --- a/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/config/FakeOption.java +++ b/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/config/FakeOption.java @@ -23,7 +23,6 @@ import java.util.List; -@SuppressWarnings("checkstyle:MagicNumber") public class FakeOption { public static final Option> ROWS = diff --git a/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/utils/FakeDataRandomUtils.java b/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/utils/FakeDataRandomUtils.java index f140eb972eae..2c4449d21f99 100644 --- a/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/utils/FakeDataRandomUtils.java +++ b/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/utils/FakeDataRandomUtils.java @@ -122,7 +122,6 @@ public LocalTime randomLocalTime() { return randomLocalDateTime().toLocalTime(); } - @SuppressWarnings("checkstyle:MagicNumber") public LocalDateTime randomLocalDateTime() { int year; int month; diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/BaseFileSinkWriter.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/BaseFileSinkWriter.java index 22200249f63d..a72c1b45dc58 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/BaseFileSinkWriter.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/BaseFileSinkWriter.java @@ -44,7 +44,6 @@ public class BaseFileSinkWriter implements SinkWriter> beingWrittenWriter; private AvroSchemaConverter schemaConverter; @@ -163,7 +162,6 @@ private ParquetWriter getOrCreateWriter(@NonNull String filePath) return writer; } - @SuppressWarnings("checkstyle:MagicNumber") private Object resolveObject(Object data, SeaTunnelDataType seaTunnelDataType) { if (data == null) { return null; @@ -222,7 +220,6 @@ private Object resolveObject(Object data, SeaTunnelDataType seaTunnelDataType } } - @SuppressWarnings("checkstyle:MagicNumber") public static Type seaTunnelDataType2ParquetDataType( String fieldName, SeaTunnelDataType seaTunnelDataType) { switch (seaTunnelDataType.getSqlType()) { diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/AbstractReadStrategy.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/AbstractReadStrategy.java index e4e1694f30dc..1dc8e7a2ad96 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/AbstractReadStrategy.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/AbstractReadStrategy.java @@ -24,8 +24,6 @@ import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.connectors.seatunnel.file.config.BaseSourceConfig; import org.apache.seatunnel.connectors.seatunnel.file.config.HadoopConf; -import org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorErrorCode; -import org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorException; import org.apache.seatunnel.connectors.seatunnel.file.sink.util.FileSystemUtils; import org.apache.hadoop.conf.Configuration; @@ -153,15 +151,9 @@ public List getFileNamesByPath(HadoopConf hadoopConf, String path) throw } } } - - if (fileNames.isEmpty()) { - throw new FileConnectorException( - FileConnectorErrorCode.FILE_LIST_EMPTY, - "The target file list is empty," - + "SeaTunnel will not be able to sync empty table, " - + "please check the configuration parameters such as: [file_filter_pattern]"); + if (this.fileNames.isEmpty()) { + log.error("The current directory is empty " + path); } - return fileNames; } @@ -196,10 +188,12 @@ public SeaTunnelRowType getActualSeaTunnelRowTypeInfo() { protected Map parsePartitionsByPath(String path) { LinkedHashMap partitions = new LinkedHashMap<>(); - Arrays.stream(path.split("/", -1)) - .filter(split -> split.contains("=")) - .map(split -> split.split("=", -1)) - .forEach(kv -> partitions.put(kv[0], kv[1])); + if (path != null && !path.isEmpty()) { + Arrays.stream(path.split("/", -1)) + .filter(split -> split.contains("=")) + .map(split -> split.split("=", -1)) + .forEach(kv -> partitions.put(kv[0], kv[1])); + } return partitions; } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java index 0b1cfc083ba6..649216c752c7 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java @@ -136,7 +136,8 @@ public void setSeaTunnelRowTypeInfo(SeaTunnelRowType seaTunnelRowType) { "Schmea information is not set or incorrect schmea settings"); } SeaTunnelRowType userDefinedRowTypeWithPartition = - mergePartitionTypes(fileNames.get(0), seaTunnelRowType); + mergePartitionTypes( + fileNames.size() > 0 ? fileNames.get(0) : null, seaTunnelRowType); // column projection if (pluginConfig.hasPath(BaseSourceConfig.READ_COLUMNS.key())) { // get the read column index from user-defined row type diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/OrcReadStrategy.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/OrcReadStrategy.java index d191c3a839a2..a4a5d27b26b6 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/OrcReadStrategy.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/OrcReadStrategy.java @@ -162,7 +162,6 @@ public SeaTunnelRowType getSeaTunnelRowTypeInfo(HadoopConf hadoopConf, String pa } } - @SuppressWarnings("checkstyle:MagicNumber") @Override boolean checkFileType(String path) { try { diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/TextReadStrategy.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/TextReadStrategy.java index 4b931cb8902d..eefa8e4de765 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/TextReadStrategy.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/TextReadStrategy.java @@ -138,7 +138,8 @@ public SeaTunnelRowType getSeaTunnelRowTypeInfo(HadoopConf hadoopConf, String pa @Override public void setSeaTunnelRowTypeInfo(SeaTunnelRowType seaTunnelRowType) { SeaTunnelRowType userDefinedRowTypeWithPartition = - mergePartitionTypes(fileNames.get(0), seaTunnelRowType); + mergePartitionTypes( + fileNames.size() > 0 ? fileNames.get(0) : null, seaTunnelRowType); if (pluginConfig.hasPath(BaseSourceConfig.DELIMITER.key())) { fieldDelimiter = pluginConfig.getString(BaseSourceConfig.DELIMITER.key()); } else { diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/OrcReadStrategyTest.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/OrcReadStrategyTest.java index 5d72ae2e00cd..5e8eb9a2c8a5 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/OrcReadStrategyTest.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/OrcReadStrategyTest.java @@ -93,7 +93,6 @@ public List getRows() { return rows; } - @SuppressWarnings("checkstyle:RegexpSingleline") @Override public void collect(SeaTunnelRow record) { System.out.println(record); diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/ParquetReadStrategyTest.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/ParquetReadStrategyTest.java index de1d8d932922..82e0bac7410b 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/ParquetReadStrategyTest.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/writer/ParquetReadStrategyTest.java @@ -129,7 +129,6 @@ public List getRows() { return rows; } - @SuppressWarnings("checkstyle:RegexpSingleline") @Override public void collect(SeaTunnelRow record) { System.out.println(record); diff --git a/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/system/SFTPFileSystem.java b/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/system/SFTPFileSystem.java index 9e551278b45a..555fb5bf9d7f 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/system/SFTPFileSystem.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/system/SFTPFileSystem.java @@ -208,7 +208,6 @@ private FileStatus getFileStatus(ChannelSftp client, Path file) throws IOExcepti return fileStat; } - @SuppressWarnings("checkstyle:MagicNumber") private FileStatus getFileStatus(ChannelSftp channel, LsEntry sftpFile, Path parentPath) throws IOException { diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/commit/HiveSinkAggregatedCommitter.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/commit/HiveSinkAggregatedCommitter.java index 7d7c271e1dac..4934cc2aa120 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/commit/HiveSinkAggregatedCommitter.java +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/commit/HiveSinkAggregatedCommitter.java @@ -34,11 +34,14 @@ import java.util.Map; import java.util.stream.Collectors; +import static org.apache.seatunnel.connectors.seatunnel.hive.config.HiveConfig.ABORT_DROP_PARTITION_METADATA; + @Slf4j public class HiveSinkAggregatedCommitter extends FileSinkAggregatedCommitter { private final Config pluginConfig; private final String dbName; private final String tableName; + private final boolean abortDropPartitionMetadata; public HiveSinkAggregatedCommitter( Config pluginConfig, String dbName, String tableName, FileSystemUtils fileSystemUtils) { @@ -46,6 +49,10 @@ public HiveSinkAggregatedCommitter( this.pluginConfig = pluginConfig; this.dbName = dbName; this.tableName = tableName; + this.abortDropPartitionMetadata = + pluginConfig.hasPath(ABORT_DROP_PARTITION_METADATA.key()) + ? pluginConfig.getBoolean(ABORT_DROP_PARTITION_METADATA.key()) + : ABORT_DROP_PARTITION_METADATA.defaultValue(); } @Override @@ -79,21 +86,23 @@ public List commit( @Override public void abort(List aggregatedCommitInfos) throws Exception { super.abort(aggregatedCommitInfos); - HiveMetaStoreProxy hiveMetaStore = HiveMetaStoreProxy.getInstance(pluginConfig); - for (FileAggregatedCommitInfo aggregatedCommitInfo : aggregatedCommitInfos) { - Map> partitionDirAndValuesMap = - aggregatedCommitInfo.getPartitionDirAndValuesMap(); - List partitions = - partitionDirAndValuesMap.keySet().stream() - .map(partition -> partition.replaceAll("\\\\", "/")) - .collect(Collectors.toList()); - try { - hiveMetaStore.dropPartitions(dbName, tableName, partitions); - log.info("Remove these partitions {}", partitions); - } catch (TException e) { - log.error("Failed to remove these partitions {}", partitions, e); + if (abortDropPartitionMetadata) { + HiveMetaStoreProxy hiveMetaStore = HiveMetaStoreProxy.getInstance(pluginConfig); + for (FileAggregatedCommitInfo aggregatedCommitInfo : aggregatedCommitInfos) { + Map> partitionDirAndValuesMap = + aggregatedCommitInfo.getPartitionDirAndValuesMap(); + List partitions = + partitionDirAndValuesMap.keySet().stream() + .map(partition -> partition.replaceAll("\\\\", "/")) + .collect(Collectors.toList()); + try { + hiveMetaStore.dropPartitions(dbName, tableName, partitions); + log.info("Remove these partitions {}", partitions); + } catch (TException e) { + log.error("Failed to remove these partitions {}", partitions, e); + } } + hiveMetaStore.close(); } - hiveMetaStore.close(); } } diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/config/HiveConfig.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/config/HiveConfig.java index 142863b5135e..8cf00b8c3072 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/config/HiveConfig.java +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/config/HiveConfig.java @@ -38,11 +38,19 @@ public class HiveConfig { .noDefaultValue() .withDescription("Hive metastore uri"); + public static final Option ABORT_DROP_PARTITION_METADATA = + Options.key("abort_drop_partition_metadata") + .booleanType() + .defaultValue(false) + .withDescription( + "Flag to decide whether to drop partition metadata from Hive Metastore during an abort operation. Note: this only affects the metadata in the metastore, the data in the partition will always be deleted(data generated during the synchronization process)."); + public static final Option HIVE_SITE_PATH = Options.key("hive_site_path") .stringType() .noDefaultValue() .withDescription("The path of hive-site.xml"); + public static final String TEXT_INPUT_FORMAT_CLASSNAME = "org.apache.hadoop.mapred.TextInputFormat"; public static final String TEXT_OUTPUT_FORMAT_CLASSNAME = diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkFactory.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkFactory.java index 6674b778c4a0..b98f6cffa502 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkFactory.java +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkFactory.java @@ -24,6 +24,8 @@ import com.google.auto.service.AutoService; +import static org.apache.seatunnel.connectors.seatunnel.hive.config.HiveConfig.ABORT_DROP_PARTITION_METADATA; + @AutoService(Factory.class) public class HiveSinkFactory implements TableSinkFactory { @Override @@ -36,6 +38,7 @@ public OptionRule optionRule() { return OptionRule.builder() .required(HiveConfig.TABLE_NAME) .required(HiveConfig.METASTORE_URI) + .optional(ABORT_DROP_PARTITION_METADATA) .build(); } } diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSourceFactory.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSourceFactory.java index d744ce2a14b4..383b26405d33 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSourceFactory.java +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/source/HiveSourceFactory.java @@ -21,6 +21,7 @@ import org.apache.seatunnel.api.source.SeaTunnelSource; import org.apache.seatunnel.api.table.factory.Factory; import org.apache.seatunnel.api.table.factory.TableSourceFactory; +import org.apache.seatunnel.connectors.seatunnel.file.config.BaseSourceConfig; import org.apache.seatunnel.connectors.seatunnel.hive.config.HiveConfig; import com.google.auto.service.AutoService; @@ -37,6 +38,8 @@ public OptionRule optionRule() { return OptionRule.builder() .required(HiveConfig.TABLE_NAME) .required(HiveConfig.METASTORE_URI) + .optional(BaseSourceConfig.READ_PARTITIONS) + .optional(BaseSourceConfig.READ_COLUMNS) .build(); } diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/utils/HiveMetaStoreProxy.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/utils/HiveMetaStoreProxy.java index f6ba5cfb12cd..788fe38dc1cd 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/utils/HiveMetaStoreProxy.java +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/utils/HiveMetaStoreProxy.java @@ -35,6 +35,8 @@ import lombok.NonNull; import lombok.extern.slf4j.Slf4j; +import java.io.File; +import java.net.MalformedURLException; import java.util.List; import java.util.Objects; @@ -54,10 +56,11 @@ private HiveMetaStoreProxy(Config config) { Configuration configuration = new Configuration(); FileSystemUtils.doKerberosAuthentication(configuration, principal, keytabPath); } - if (config.hasPath(HiveConfig.HIVE_SITE_PATH.key())) { - hiveConf.addResource(config.getString(HiveConfig.HIVE_SITE_PATH.key())); - } try { + if (config.hasPath(HiveConfig.HIVE_SITE_PATH.key())) { + String hiveSitePath = config.getString(HiveConfig.HIVE_SITE_PATH.key()); + hiveConf.addResource(new File(hiveSitePath).toURI().toURL()); + } hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf); } catch (MetaException e) { String errorMsg = @@ -67,6 +70,14 @@ private HiveMetaStoreProxy(Config config) { metastoreUri); throw new HiveConnectorException( HiveConnectorErrorCode.INITIALIZE_HIVE_METASTORE_CLIENT_FAILED, errorMsg, e); + } catch (MalformedURLException e) { + String errorMsg = + String.format( + "Using this hive uris [%s], hive conf [%s] to initialize " + + "hive metastore client instance failed", + metastoreUri, config.getString(HiveConfig.HIVE_SITE_PATH.key())); + throw new HiveConnectorException( + HiveConnectorErrorCode.INITIALIZE_HIVE_METASTORE_CLIENT_FAILED, errorMsg, e); } } diff --git a/seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/client/HttpClientProvider.java b/seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/client/HttpClientProvider.java index ef5b1f77d6bd..2c6fe67b797b 100644 --- a/seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/client/HttpClientProvider.java +++ b/seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/client/HttpClientProvider.java @@ -404,7 +404,17 @@ private void addHeaders(HttpRequestBase request, Map headers) { headers.forEach(request::addHeader); } + private boolean checkAlreadyHaveContentType(HttpEntityEnclosingRequestBase request) { + if (request.getEntity() != null && request.getEntity().getContentType() != null) { + return HTTP.CONTENT_TYPE.equals(request.getEntity().getContentType().getName()); + } + return false; + } + private void addBody(HttpEntityEnclosingRequestBase request, String body) { + if (checkAlreadyHaveContentType(request)) { + return; + } request.addHeader(HTTP.CONTENT_TYPE, APPLICATION_JSON); if (StringUtils.isBlank(body)) { diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/IcebergTableLoader.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/IcebergTableLoader.java index eb2826393780..554dd0bd7599 100644 --- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/IcebergTableLoader.java +++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/IcebergTableLoader.java @@ -31,7 +31,7 @@ import java.io.IOException; import java.io.Serializable; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; public class IcebergTableLoader implements Closeable, Serializable { diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/config/CommonConfig.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/config/CommonConfig.java index b82d21a706ee..2f893da092bf 100644 --- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/config/CommonConfig.java +++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/config/CommonConfig.java @@ -26,12 +26,11 @@ import lombok.ToString; import java.io.Serializable; -import java.util.List; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; import static org.apache.seatunnel.connectors.seatunnel.iceberg.config.IcebergCatalogType.HADOOP; import static org.apache.seatunnel.connectors.seatunnel.iceberg.config.IcebergCatalogType.HIVE; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkNotNull; @Getter @ToString @@ -80,12 +79,6 @@ public class CommonConfig implements Serializable { .defaultValue(false) .withDescription(" the iceberg case_sensitive"); - public static final Option> KEY_FIELDS = - Options.key("fields") - .listType() - .noDefaultValue() - .withDescription(" the iceberg table fields"); - private String catalogName; private IcebergCatalogType catalogType; private String uri; diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/source/IcebergSource.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/source/IcebergSource.java index 7889376e4090..246aa7dd1476 100644 --- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/source/IcebergSource.java +++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/source/IcebergSource.java @@ -53,7 +53,7 @@ import java.util.ArrayList; import java.util.List; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; @AutoService(SeaTunnelSource.class) public class IcebergSource diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/source/enumerator/scan/IcebergScanSplitPlanner.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/source/enumerator/scan/IcebergScanSplitPlanner.java index b986702b76b8..4a084d73a538 100644 --- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/source/enumerator/scan/IcebergScanSplitPlanner.java +++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/source/enumerator/scan/IcebergScanSplitPlanner.java @@ -43,7 +43,7 @@ import java.util.List; import java.util.Optional; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; @Slf4j public class IcebergScanSplitPlanner { diff --git a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/config/InfluxDBConfig.java b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/config/InfluxDBConfig.java index 70690bf1d1e0..fa43f8fa6dff 100644 --- a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/config/InfluxDBConfig.java +++ b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/config/InfluxDBConfig.java @@ -28,7 +28,6 @@ import java.io.Serializable; @Data -@SuppressWarnings("checkstyle:MagicNumber") public class InfluxDBConfig implements Serializable { public static final Option USERNAME = diff --git a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/config/SinkConfig.java b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/config/SinkConfig.java index b5988915fc97..806309bffeba 100644 --- a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/config/SinkConfig.java +++ b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/config/SinkConfig.java @@ -31,7 +31,6 @@ @Setter @Getter @ToString -@SuppressWarnings("checkstyle:MagicNumber") public class SinkConfig extends InfluxDBConfig { public SinkConfig(Config config) { super(config); @@ -61,12 +60,6 @@ public SinkConfig(Config config) { .defaultValue(1024) .withDescription("batch size of the influxdb client"); - public static final Option BATCH_INTERVAL_MS = - Options.key("batch_interval_ms") - .intType() - .noDefaultValue() - .withDescription("batch interval ms of the influxdb client"); - public static final Option MAX_RETRIES = Options.key("max_retries") .intType() @@ -105,7 +98,6 @@ public SinkConfig(Config config) { private String keyTime; private List keyTags; private int batchSize = BATCH_SIZE.defaultValue(); - private Integer batchIntervalMs; private int maxRetries; private int retryBackoffMultiplierMs; private int maxRetryBackoffMs; @@ -120,9 +112,6 @@ public static SinkConfig loadConfig(Config config) { if (config.hasPath(KEY_TAGS.key())) { sinkConfig.setKeyTags(config.getStringList(KEY_TAGS.key())); } - if (config.hasPath(BATCH_INTERVAL_MS.key())) { - sinkConfig.setBatchIntervalMs(config.getInt(BATCH_INTERVAL_MS.key())); - } if (config.hasPath(MAX_RETRIES.key())) { sinkConfig.setMaxRetries(config.getInt(MAX_RETRIES.key())); } diff --git a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkFactory.java b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkFactory.java index f3673ddd9b73..3d44158e78b1 100644 --- a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkFactory.java +++ b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkFactory.java @@ -28,7 +28,6 @@ import static org.apache.seatunnel.connectors.seatunnel.influxdb.config.InfluxDBConfig.PASSWORD; import static org.apache.seatunnel.connectors.seatunnel.influxdb.config.InfluxDBConfig.URL; import static org.apache.seatunnel.connectors.seatunnel.influxdb.config.InfluxDBConfig.USERNAME; -import static org.apache.seatunnel.connectors.seatunnel.influxdb.config.SinkConfig.BATCH_INTERVAL_MS; import static org.apache.seatunnel.connectors.seatunnel.influxdb.config.SinkConfig.BATCH_SIZE; import static org.apache.seatunnel.connectors.seatunnel.influxdb.config.SinkConfig.KEY_MEASUREMENT; import static org.apache.seatunnel.connectors.seatunnel.influxdb.config.SinkConfig.KEY_TAGS; @@ -54,7 +53,6 @@ public OptionRule optionRule() { KEY_TAGS, KEY_TIME, BATCH_SIZE, - BATCH_INTERVAL_MS, MAX_RETRIES, RETRY_BACKOFF_MULTIPLIER_MS) .build(); diff --git a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkWriter.java b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkWriter.java index 4683e4f460fc..a70c9f9e9a83 100644 --- a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkWriter.java +++ b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkWriter.java @@ -34,7 +34,6 @@ import org.influxdb.dto.BatchPoints; import org.influxdb.dto.Point; -import com.google.common.util.concurrent.ThreadFactoryBuilder; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; @@ -43,10 +42,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Optional; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.TimeUnit; @Slf4j public class InfluxDBSinkWriter extends AbstractSinkWriter { @@ -55,15 +50,11 @@ public class InfluxDBSinkWriter extends AbstractSinkWriter { private InfluxDB influxdb; private final SinkConfig sinkConfig; private final List batchList; - private ScheduledExecutorService scheduler; - private ScheduledFuture scheduledFuture; private volatile Exception flushException; - private final Integer batchIntervalMs; public InfluxDBSinkWriter(Config pluginConfig, SeaTunnelRowType seaTunnelRowType) throws ConnectException { this.sinkConfig = SinkConfig.loadConfig(pluginConfig); - this.batchIntervalMs = sinkConfig.getBatchIntervalMs(); this.serializer = new DefaultSerializer( seaTunnelRowType, @@ -73,26 +64,6 @@ public InfluxDBSinkWriter(Config pluginConfig, SeaTunnelRowType seaTunnelRowType sinkConfig.getMeasurement()); this.batchList = new ArrayList<>(); - if (batchIntervalMs != null) { - scheduler = - Executors.newSingleThreadScheduledExecutor( - new ThreadFactoryBuilder() - .setNameFormat("influxDB-sink-output-%s") - .build()); - scheduledFuture = - scheduler.scheduleAtFixedRate( - () -> { - try { - flush(); - } catch (IOException e) { - flushException = e; - } - }, - batchIntervalMs, - batchIntervalMs, - TimeUnit.MILLISECONDS); - } - connect(); } @@ -112,11 +83,6 @@ public Optional prepareCommit() { @Override public void close() throws IOException { - if (scheduledFuture != null) { - scheduledFuture.cancel(false); - scheduler.shutdown(); - } - flush(); if (influxdb != null) { diff --git a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/config/SinkConfig.java b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/config/SinkConfig.java index bc9810e7f176..ce9b784b0608 100644 --- a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/config/SinkConfig.java +++ b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/config/SinkConfig.java @@ -30,8 +30,8 @@ import java.time.ZoneId; import java.util.List; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkNotNull; @Setter @Getter @@ -62,11 +62,6 @@ public class SinkConfig extends CommonConfig { .intType() .defaultValue(DEFAULT_BATCH_SIZE) .withDescription("batch size"); - public static final Option BATCH_INTERVAL_MS = - Options.key("batch_interval_ms") - .stringType() - .noDefaultValue() - .withDescription("batch interval ms"); public static final Option MAX_RETRIES = Options.key("max_retries").intType().noDefaultValue().withDescription("max retries"); public static final Option RETRY_BACKOFF_MULTIPLIER_MS = @@ -107,7 +102,6 @@ public class SinkConfig extends CommonConfig { private List keyMeasurementFields; private String storageGroup; private int batchSize = BATCH_SIZE.defaultValue(); - private Integer batchIntervalMs; private int maxRetries; private int retryBackoffMultiplierMs; private int maxRetryBackoffMs; @@ -144,10 +138,6 @@ public static SinkConfig loadConfig(Config pluginConfig) { int batchSize = checkIntArgument(pluginConfig.getInt(BATCH_SIZE.key())); sinkConfig.setBatchSize(batchSize); } - if (pluginConfig.hasPath(BATCH_INTERVAL_MS.key())) { - int batchIntervalMs = checkIntArgument(pluginConfig.getInt(BATCH_INTERVAL_MS.key())); - sinkConfig.setBatchIntervalMs(batchIntervalMs); - } if (pluginConfig.hasPath(MAX_RETRIES.key())) { int maxRetries = checkIntArgument(pluginConfig.getInt(MAX_RETRIES.key())); sinkConfig.setMaxRetries(maxRetries); diff --git a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/config/SourceConfig.java b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/config/SourceConfig.java index be96a7d91879..ac515ef37a91 100644 --- a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/config/SourceConfig.java +++ b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/config/SourceConfig.java @@ -31,20 +31,6 @@ public class SourceConfig { public static final Option SQL = Options.key("sql").stringType().noDefaultValue().withDescription("sql"); - /*---------------------- single node configurations -------------------------*/ - - /** The host of the IotDB server. */ - public static final Option HOST = - Options.key("host").stringType().noDefaultValue().withDescription("host"); - - /* - * The port of the IotDB server. - */ - public static final Option PORT = - Options.key("port").intType().noDefaultValue().withDescription("port"); - - /*---------------------- multiple node configurations -------------------------*/ - /** Username for the source. */ public static final Option USERNAME = Options.key("username").stringType().noDefaultValue().withDescription("usernam"); @@ -53,7 +39,7 @@ public class SourceConfig { public static final Option PASSWORD = Options.key("password").stringType().noDefaultValue().withDescription("password"); - /** multiple nodes */ + /** node urls */ public static final Option NODE_URLS = Options.key("node_urls").stringType().noDefaultValue().withDescription("node urls"); diff --git a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/sink/IoTDBSinkClient.java b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/sink/IoTDBSinkClient.java index 87b66f62bd1b..e3cdac1ba4b4 100644 --- a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/sink/IoTDBSinkClient.java +++ b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/sink/IoTDBSinkClient.java @@ -28,17 +28,12 @@ import org.apache.iotdb.session.Session; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; -import com.google.common.util.concurrent.ThreadFactoryBuilder; import lombok.Getter; import lombok.extern.slf4j.Slf4j; import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.TimeUnit; @Slf4j public class IoTDBSinkClient { @@ -47,8 +42,6 @@ public class IoTDBSinkClient { private final List batchList; private Session session; - private ScheduledExecutorService scheduler; - private ScheduledFuture scheduledFuture; private volatile boolean initialize; private volatile Exception flushException; @@ -95,26 +88,6 @@ private void tryInit() throws IOException { "Initialize IoTDB client failed.", e); } - - if (sinkConfig.getBatchIntervalMs() != null) { - scheduler = - Executors.newSingleThreadScheduledExecutor( - new ThreadFactoryBuilder() - .setNameFormat("IoTDB-sink-output-%s") - .build()); - scheduledFuture = - scheduler.scheduleAtFixedRate( - () -> { - try { - flush(); - } catch (IOException e) { - flushException = e; - } - }, - sinkConfig.getBatchIntervalMs(), - sinkConfig.getBatchIntervalMs(), - TimeUnit.MILLISECONDS); - } initialize = true; } @@ -129,11 +102,6 @@ public synchronized void write(IoTDBRecord record) throws IOException { } public synchronized void close() throws IOException { - if (scheduledFuture != null) { - scheduledFuture.cancel(false); - scheduler.shutdown(); - } - flush(); try { diff --git a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/sink/IoTDBSinkFactory.java b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/sink/IoTDBSinkFactory.java index 7b46df3ae553..67a4527cf27b 100644 --- a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/sink/IoTDBSinkFactory.java +++ b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/sink/IoTDBSinkFactory.java @@ -26,7 +26,6 @@ import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.CommonConfig.NODE_URLS; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.CommonConfig.PASSWORD; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.CommonConfig.USERNAME; -import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SinkConfig.BATCH_INTERVAL_MS; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SinkConfig.BATCH_SIZE; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SinkConfig.CONNECTION_TIMEOUT_IN_MS; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SinkConfig.DEFAULT_THRIFT_BUFFER_SIZE; @@ -57,7 +56,6 @@ public OptionRule optionRule() { KEY_MEASUREMENT_FIELDS, STORAGE_GROUP, BATCH_SIZE, - BATCH_INTERVAL_MS, MAX_RETRIES, RETRY_BACKOFF_MULTIPLIER_MS, MAX_RETRY_BACKOFF_MS, diff --git a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSource.java b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSource.java index 7f2960ae007d..0c171ada4fcf 100644 --- a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSource.java +++ b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSource.java @@ -43,9 +43,7 @@ import java.util.HashMap; import java.util.Map; -import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.HOST; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.NODE_URLS; -import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.PORT; @AutoService(SeaTunnelSource.class) public class IoTDBSource @@ -66,11 +64,7 @@ public String getPluginName() { @Override public void prepare(Config pluginConfig) throws PrepareFailException { - CheckResult urlCheckResult = - CheckConfigUtil.checkAllExists(pluginConfig, HOST.key(), PORT.key()); - if (!urlCheckResult.isSuccess()) { - urlCheckResult = CheckConfigUtil.checkAllExists(pluginConfig, NODE_URLS.key()); - } + CheckResult urlCheckResult = CheckConfigUtil.checkAllExists(pluginConfig, NODE_URLS.key()); CheckResult schemaCheckResult = CheckConfigUtil.checkAllExists(pluginConfig, CatalogTableUtil.SCHEMA.key()); CheckResult mergedConfigCheck = diff --git a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSourceFactory.java b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSourceFactory.java index c697df701296..2c2a521fd84a 100644 --- a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSourceFactory.java +++ b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSourceFactory.java @@ -30,10 +30,8 @@ import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.CommonConfig.USERNAME; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.ENABLE_CACHE_LEADER; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.FETCH_SIZE; -import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.HOST; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.LOWER_BOUND; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.NUM_PARTITIONS; -import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.PORT; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.SQL; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.THRIFT_DEFAULT_BUFFER_SIZE; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.THRIFT_MAX_FRAME_SIZE; @@ -52,8 +50,6 @@ public OptionRule optionRule() { return OptionRule.builder() .required(NODE_URLS, USERNAME, PASSWORD, SQL, SCHEMA) .optional( - HOST, - PORT, FETCH_SIZE, THRIFT_DEFAULT_BUFFER_SIZE, THRIFT_MAX_FRAME_SIZE, diff --git a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSourceReader.java b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSourceReader.java index c4ecd9dc81dc..546487825c35 100644 --- a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSourceReader.java +++ b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSourceReader.java @@ -46,10 +46,8 @@ import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.ENABLE_CACHE_LEADER; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.FETCH_SIZE; -import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.HOST; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.NODE_URLS; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.PASSWORD; -import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.PORT; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.THRIFT_DEFAULT_BUFFER_SIZE; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.THRIFT_MAX_FRAME_SIZE; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.USERNAME; @@ -130,17 +128,10 @@ private void read(IoTDBSourceSplit split, Collector output) throws private Session buildSession(Map conf) { Session.Builder sessionBuilder = new Session.Builder(); - if (conf.containsKey(HOST.key())) { - sessionBuilder - .host((String) conf.get(HOST.key())) - .port(Integer.parseInt(conf.get(PORT.key()).toString())) - .build(); - } else { - String nodeUrlsString = (String) conf.get(NODE_URLS.key()); - List nodes = - Stream.of(nodeUrlsString.split(NODES_SPLIT)).collect(Collectors.toList()); - sessionBuilder.nodeUrls(nodes); - } + String nodeUrlsString = (String) conf.get(NODE_URLS.key()); + List nodes = + Stream.of(nodeUrlsString.split(NODES_SPLIT)).collect(Collectors.toList()); + sessionBuilder.nodeUrls(nodes); if (null != conf.get(FETCH_SIZE.key())) { sessionBuilder.fetchSize(Integer.parseInt(conf.get(FETCH_SIZE.key()).toString())); } diff --git a/seatunnel-connectors-v2/connector-jdbc/pom.xml b/seatunnel-connectors-v2/connector-jdbc/pom.xml index e76237e7e07f..62d541d19f06 100644 --- a/seatunnel-connectors-v2/connector-jdbc/pom.xml +++ b/seatunnel-connectors-v2/connector-jdbc/pom.xml @@ -46,6 +46,7 @@ 3.13.29 12.0.3-0 2.5.1 + 8.6.0 @@ -143,6 +144,12 @@ ${vertica.version} provided + + cn.com.kingbase + kingbase8 + ${kingbase8.version} + provided + @@ -218,5 +225,11 @@ com.vertica.jdbc vertica-jdbc + + + cn.com.kingbase + kingbase8 + + diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java index ee108ad5e7fc..ddc327fbc300 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java @@ -20,9 +20,12 @@ import org.apache.seatunnel.api.table.catalog.Catalog; import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.api.table.catalog.ConstraintKey; import org.apache.seatunnel.api.table.catalog.PrimaryKey; +import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; import org.apache.seatunnel.api.table.catalog.exception.CatalogException; import org.apache.seatunnel.api.table.catalog.exception.DatabaseAlreadyExistException; import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException; @@ -40,22 +43,29 @@ import java.sql.Connection; import java.sql.DatabaseMetaData; import java.sql.DriverManager; +import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; +import java.util.Collections; import java.util.Comparator; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Collectors; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkNotNull; public abstract class AbstractJdbcCatalog implements Catalog { private static final Logger LOG = LoggerFactory.getLogger(AbstractJdbcCatalog.class); + protected static final Set SYS_DATABASES = new HashSet<>(); + protected final String catalogName; protected final String defaultDatabase; protected final String username; @@ -66,7 +76,7 @@ public abstract class AbstractJdbcCatalog implements Catalog { protected final Optional defaultSchema; - protected Connection defaultConnection; + protected final Map connectionMap; public AbstractJdbcCatalog( String catalogName, @@ -88,6 +98,7 @@ public AbstractJdbcCatalog( this.defaultUrl = urlInfo.getOrigin(); this.suffix = urlInfo.getSuffix(); this.defaultSchema = Optional.ofNullable(defaultSchema); + this.connectionMap = new ConcurrentHashMap<>(); } @Override @@ -95,51 +106,101 @@ public String getDefaultDatabase() { return defaultDatabase; } - public String getCatalogName() { - return catalogName; + protected Connection getConnection(String url) { + if (connectionMap.containsKey(url)) { + return connectionMap.get(url); + } + try { + Connection connection = DriverManager.getConnection(url, username, pwd); + connectionMap.put(url, connection); + return connection; + } catch (SQLException e) { + throw new CatalogException(String.format("Failed connecting to %s via JDBC.", url), e); + } } - public String getUsername() { - return username; + @Override + public void open() throws CatalogException { + getConnection(defaultUrl); + LOG.info("Catalog {} established connection to {}", catalogName, defaultUrl); } - public String getPassword() { - return pwd; + @Override + public void close() throws CatalogException { + for (Map.Entry entry : connectionMap.entrySet()) { + try { + entry.getValue().close(); + } catch (SQLException e) { + throw new CatalogException( + String.format("Failed to close %s via JDBC.", entry.getKey()), e); + } + } + connectionMap.clear(); + LOG.info("Catalog {} closing", catalogName); } - public String getBaseUrl() { - return baseUrl; + protected String getSelectColumnsSql(TablePath tablePath) { + throw new UnsupportedOperationException(); } - @Override - public void open() throws CatalogException { - try { - defaultConnection = DriverManager.getConnection(defaultUrl, username, pwd); - } catch (SQLException e) { - throw new CatalogException( - String.format("Failed connecting to %s via JDBC.", defaultUrl), e); - } + protected Column buildColumn(ResultSet resultSet) throws SQLException { + throw new UnsupportedOperationException(); + } - LOG.info("Catalog {} established connection to {}", catalogName, defaultUrl); + protected TableIdentifier getTableIdentifier(TablePath tablePath) { + return TableIdentifier.of( + catalogName, + tablePath.getDatabaseName(), + tablePath.getSchemaName(), + tablePath.getTableName()); } - @Override - public void close() throws CatalogException { - if (defaultConnection == null) { - return; + public CatalogTable getTable(TablePath tablePath) + throws CatalogException, TableNotExistException { + if (!tableExists(tablePath)) { + throw new TableNotExistException(catalogName, tablePath); } + + String dbUrl = getUrlFromDatabaseName(tablePath.getDatabaseName()); + Connection conn = getConnection(dbUrl); try { - defaultConnection.close(); - } catch (SQLException e) { + DatabaseMetaData metaData = conn.getMetaData(); + Optional primaryKey = getPrimaryKey(metaData, tablePath); + List constraintKeys = getConstraintKeys(metaData, tablePath); + try (PreparedStatement ps = conn.prepareStatement(getSelectColumnsSql(tablePath)); + ResultSet resultSet = ps.executeQuery()) { + + TableSchema.Builder builder = TableSchema.builder(); + while (resultSet.next()) { + builder.column(buildColumn(resultSet)); + } + // add primary key + primaryKey.ifPresent(builder::primaryKey); + // add constraint key + constraintKeys.forEach(builder::constraintKey); + TableIdentifier tableIdentifier = getTableIdentifier(tablePath); + return CatalogTable.of( + tableIdentifier, + builder.build(), + buildConnectorOptions(tablePath), + Collections.emptyList(), + "", + catalogName); + } + + } catch (Exception e) { throw new CatalogException( - String.format("Failed to close %s via JDBC.", defaultUrl), e); + String.format("Failed getting table %s", tablePath.getFullName()), e); } - LOG.info("Catalog {} closing", catalogName); } - protected Optional getPrimaryKey( - DatabaseMetaData metaData, String database, String table) throws SQLException { - return getPrimaryKey(metaData, database, table, table); + protected Optional getPrimaryKey(DatabaseMetaData metaData, TablePath tablePath) + throws SQLException { + return getPrimaryKey( + metaData, + tablePath.getDatabaseName(), + tablePath.getSchemaName(), + tablePath.getTableName()); } protected Optional getPrimaryKey( @@ -174,9 +235,13 @@ protected Optional getPrimaryKey( return Optional.of(PrimaryKey.of(pkName, pkFields)); } - protected List getConstraintKeys( - DatabaseMetaData metaData, String database, String table) throws SQLException { - return getConstraintKeys(metaData, database, table, table); + protected List getConstraintKeys(DatabaseMetaData metaData, TablePath tablePath) + throws SQLException { + return getConstraintKeys( + metaData, + tablePath.getDatabaseName(), + tablePath.getSchemaName(), + tablePath.getTableName()); } protected List getConstraintKeys( @@ -217,16 +282,24 @@ protected List getConstraintKeys( return new ArrayList<>(constraintKeyMap.values()); } - protected Optional getColumnDefaultValue( - DatabaseMetaData metaData, String database, String schema, String table, String column) - throws SQLException { - try (ResultSet resultSet = metaData.getColumns(database, schema, table, column)) { - while (resultSet.next()) { - String defaultValue = resultSet.getString("COLUMN_DEF"); - return Optional.ofNullable(defaultValue); - } + protected String getListDatabaseSql() { + throw new UnsupportedOperationException(); + } + + @Override + public List listDatabases() throws CatalogException { + try { + return queryString( + defaultUrl, + getListDatabaseSql(), + rs -> { + String s = rs.getString(1); + return SYS_DATABASES.contains(s) ? null : s; + }); + } catch (Exception e) { + throw new CatalogException( + String.format("Failed listing database in catalog %s", this.catalogName), e); } - return Optional.empty(); } @Override @@ -236,11 +309,44 @@ public boolean databaseExists(String databaseName) throws CatalogException { return listDatabases().contains(databaseName); } + protected String getListTableSql(String databaseName) { + throw new UnsupportedOperationException(); + } + + protected String getTableName(ResultSet rs) throws SQLException { + String schemaName = rs.getString(1); + String tableName = rs.getString(2); + if (StringUtils.isNotBlank(schemaName) && !SYS_DATABASES.contains(schemaName)) { + return schemaName + "." + tableName; + } + return null; + } + + protected String getTableName(TablePath tablePath) { + return tablePath.getSchemaAndTableName(); + } + + @Override + public List listTables(String databaseName) + throws CatalogException, DatabaseNotExistException { + if (!databaseExists(databaseName)) { + throw new DatabaseNotExistException(this.catalogName, databaseName); + } + + String dbUrl = getUrlFromDatabaseName(databaseName); + try { + return queryString(dbUrl, getListTableSql(databaseName), this::getTableName); + } catch (Exception e) { + throw new CatalogException( + String.format("Failed listing database in catalog %s", catalogName), e); + } + } + @Override public boolean tableExists(TablePath tablePath) throws CatalogException { try { return databaseExists(tablePath.getDatabaseName()) - && listTables(tablePath.getDatabaseName()).contains(tablePath.getTableName()); + && listTables(tablePath.getDatabaseName()).contains(getTableName(tablePath)); } catch (DatabaseNotExistException e) { return false; } @@ -261,24 +367,61 @@ public void createTable(TablePath tablePath, CatalogTable table, boolean ignoreI defaultSchema.get(), tablePath.getTableName()); } - if (!createTableInternal(tablePath, table) && !ignoreIfExists) { + + if (tableExists(tablePath)) { + if (ignoreIfExists) { + return; + } throw new TableAlreadyExistException(catalogName, tablePath); } + + createTableInternal(tablePath, table); + } + + protected String getCreateTableSql(TablePath tablePath, CatalogTable table) { + throw new UnsupportedOperationException(); } - protected abstract boolean createTableInternal(TablePath tablePath, CatalogTable table) - throws CatalogException; + protected void createTableInternal(TablePath tablePath, CatalogTable table) + throws CatalogException { + String dbUrl = getUrlFromDatabaseName(tablePath.getDatabaseName()); + try { + executeInternal(dbUrl, getCreateTableSql(tablePath, table)); + } catch (Exception e) { + throw new CatalogException( + String.format("Failed creating table %s", tablePath.getFullName()), e); + } + } @Override public void dropTable(TablePath tablePath, boolean ignoreIfNotExists) throws TableNotExistException, CatalogException { checkNotNull(tablePath, "Table path cannot be null"); - if (!dropTableInternal(tablePath) && !ignoreIfNotExists) { + + if (!tableExists(tablePath)) { + if (ignoreIfNotExists) { + return; + } throw new TableNotExistException(catalogName, tablePath); } + + dropTableInternal(tablePath); } - protected abstract boolean dropTableInternal(TablePath tablePath) throws CatalogException; + protected String getDropTableSql(TablePath tablePath) { + throw new UnsupportedOperationException(); + } + + protected void dropTableInternal(TablePath tablePath) throws CatalogException { + String dbUrl = getUrlFromDatabaseName(tablePath.getDatabaseName()); + try { + // Will there exist concurrent drop for one table? + executeInternal(dbUrl, getDropTableSql(tablePath)); + } catch (SQLException e) { + throw new CatalogException( + String.format("Failed dropping table %s", tablePath.getFullName()), e); + } + } @Override public void createDatabase(TablePath tablePath, boolean ignoreIfExists) @@ -287,14 +430,42 @@ public void createDatabase(TablePath tablePath, boolean ignoreIfExists) checkNotNull(tablePath.getDatabaseName(), "Database name cannot be null"); if (databaseExists(tablePath.getDatabaseName())) { + if (ignoreIfExists) { + return; + } throw new DatabaseAlreadyExistException(catalogName, tablePath.getDatabaseName()); } - if (!createDatabaseInternal(tablePath.getDatabaseName()) && !ignoreIfExists) { - throw new DatabaseAlreadyExistException(catalogName, tablePath.getDatabaseName()); + + createDatabaseInternal(tablePath.getDatabaseName()); + } + + protected String getCreateDatabaseSql(String databaseName) { + throw new UnsupportedOperationException(); + } + + protected void createDatabaseInternal(String databaseName) { + try { + executeInternal(defaultUrl, getCreateDatabaseSql(databaseName)); + } catch (Exception e) { + throw new CatalogException( + String.format( + "Failed creating database %s in catalog %s", + databaseName, this.catalogName), + e); } } - protected abstract boolean createDatabaseInternal(String databaseName); + protected void closeDatabaseConnection(String databaseName) { + String dbUrl = getUrlFromDatabaseName(databaseName); + try { + Connection connection = connectionMap.remove(dbUrl); + if (connection != null) { + connection.close(); + } + } catch (SQLException e) { + throw new CatalogException(String.format("Failed to close %s via JDBC.", dbUrl), e); + } + } @Override public void dropDatabase(TablePath tablePath, boolean ignoreIfNotExists) @@ -302,10 +473,77 @@ public void dropDatabase(TablePath tablePath, boolean ignoreIfNotExists) checkNotNull(tablePath, "Table path cannot be null"); checkNotNull(tablePath.getDatabaseName(), "Database name cannot be null"); - if (!dropDatabaseInternal(tablePath.getDatabaseName()) && !ignoreIfNotExists) { + if (!databaseExists(tablePath.getDatabaseName())) { + if (ignoreIfNotExists) { + return; + } throw new DatabaseNotExistException(catalogName, tablePath.getDatabaseName()); } + + dropDatabaseInternal(tablePath.getDatabaseName()); + } + + protected String getDropDatabaseSql(String databaseName) { + throw new UnsupportedOperationException(); + } + + protected void dropDatabaseInternal(String databaseName) throws CatalogException { + try { + executeInternal(defaultUrl, getDropDatabaseSql(databaseName)); + } catch (Exception e) { + throw new CatalogException( + String.format( + "Failed dropping database %s in catalog %s", + databaseName, this.catalogName), + e); + } + } + + protected String getUrlFromDatabaseName(String databaseName) { + String url = baseUrl.endsWith("/") ? baseUrl : baseUrl + "/"; + return url + databaseName + suffix; + } + + protected String getOptionTableName(TablePath tablePath) { + return tablePath.getFullName(); + } + + @SuppressWarnings("MagicNumber") + protected Map buildConnectorOptions(TablePath tablePath) { + Map options = new HashMap<>(8); + options.put("connector", "jdbc"); + options.put("url", getUrlFromDatabaseName(tablePath.getDatabaseName())); + options.put("table-name", getOptionTableName(tablePath)); + options.put("username", username); + options.put("password", pwd); + return options; + } + + @FunctionalInterface + public interface ResultSetConsumer { + T apply(ResultSet rs) throws SQLException; + } + + protected List queryString(String url, String sql, ResultSetConsumer consumer) + throws SQLException { + try (PreparedStatement ps = getConnection(url).prepareStatement(sql)) { + List result = new ArrayList<>(); + ResultSet rs = ps.executeQuery(); + while (rs.next()) { + String value = consumer.apply(rs); + if (value != null) { + result.add(value); + } + } + return result; + } } - protected abstract boolean dropDatabaseInternal(String databaseName) throws CatalogException; + // If sql is DDL, the execute() method always returns false, so the return value + // should not be used to determine whether changes were made in database. + protected boolean executeInternal(String url, String sql) throws SQLException { + try (PreparedStatement ps = getConnection(url).prepareStatement(sql)) { + return ps.execute(); + } + } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalog.java index 267a68f0eefc..b558926e453a 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalog.java @@ -19,47 +19,34 @@ package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.mysql; import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.api.table.catalog.ConstraintKey; import org.apache.seatunnel.api.table.catalog.PhysicalColumn; import org.apache.seatunnel.api.table.catalog.PrimaryKey; import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.catalog.TablePath; -import org.apache.seatunnel.api.table.catalog.TableSchema; -import org.apache.seatunnel.api.table.catalog.exception.CatalogException; -import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException; -import org.apache.seatunnel.api.table.catalog.exception.TableNotExistException; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.common.utils.JdbcUrlUtil; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog; import com.mysql.cj.MysqlType; -import com.mysql.cj.jdbc.result.ResultSetImpl; -import com.mysql.cj.util.StringUtils; import lombok.extern.slf4j.Slf4j; -import java.sql.Connection; import java.sql.DatabaseMetaData; -import java.sql.DriverManager; -import java.sql.PreparedStatement; import java.sql.ResultSet; -import java.sql.ResultSetMetaData; import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Optional; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; @Slf4j public class MySqlCatalog extends AbstractJdbcCatalog { - protected static final Set SYS_DATABASES = new HashSet<>(4); - private final String SELECT_COLUMNS = + private static final MysqlDataTypeConvertor DATA_TYPE_CONVERTOR = new MysqlDataTypeConvertor(); + + private static final String SELECT_COLUMNS_SQL_TEMPLATE = "SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME ='%s'"; static { @@ -69,137 +56,65 @@ public class MySqlCatalog extends AbstractJdbcCatalog { SYS_DATABASES.add("sys"); } - protected final Map connectionMap; - public MySqlCatalog( String catalogName, String username, String pwd, JdbcUrlUtil.UrlInfo urlInfo) { super(catalogName, username, pwd, urlInfo, null); - this.connectionMap = new ConcurrentHashMap<>(); } - public Connection getConnection(String url) { - if (connectionMap.containsKey(url)) { - return connectionMap.get(url); - } - try { - Connection connection = DriverManager.getConnection(url, username, pwd); - connectionMap.put(url, connection); - return connection; - } catch (SQLException e) { - throw new CatalogException(String.format("Failed connecting to %s via JDBC.", url), e); - } + @Override + protected String getListDatabaseSql() { + return "SHOW DATABASES;"; } @Override - public void close() throws CatalogException { - for (Map.Entry entry : connectionMap.entrySet()) { - try { - entry.getValue().close(); - } catch (SQLException e) { - throw new CatalogException( - String.format("Failed to close %s via JDBC.", entry.getKey()), e); - } - } - super.close(); + protected String getListTableSql(String databaseName) { + return "SHOW TABLES;"; } @Override - public List listDatabases() throws CatalogException { - try (PreparedStatement ps = defaultConnection.prepareStatement("SHOW DATABASES;")) { - - List databases = new ArrayList<>(); - ResultSet rs = ps.executeQuery(); - - while (rs.next()) { - String databaseName = rs.getString(1); - if (!SYS_DATABASES.contains(databaseName)) { - databases.add(rs.getString(1)); - } - } - - return databases; - } catch (Exception e) { - throw new CatalogException( - String.format("Failed listing database in catalog %s", this.catalogName), e); - } + protected String getTableName(ResultSet rs) throws SQLException { + return rs.getString(1); } @Override - public List listTables(String databaseName) - throws CatalogException, DatabaseNotExistException { - if (!databaseExists(databaseName)) { - throw new DatabaseNotExistException(this.catalogName, databaseName); - } - - String dbUrl = getUrlFromDatabaseName(databaseName); - Connection connection = getConnection(dbUrl); - try (PreparedStatement ps = connection.prepareStatement("SHOW TABLES;")) { - - ResultSet rs = ps.executeQuery(); - - List tables = new ArrayList<>(); - - while (rs.next()) { - tables.add(rs.getString(1)); - } - - return tables; - } catch (Exception e) { - throw new CatalogException( - String.format("Failed listing database in catalog %s", catalogName), e); - } + protected String getTableName(TablePath tablePath) { + return tablePath.getTableName(); } @Override - public CatalogTable getTable(TablePath tablePath) - throws CatalogException, TableNotExistException { - if (!tableExists(tablePath)) { - throw new TableNotExistException(catalogName, tablePath); - } - - String dbUrl = getUrlFromDatabaseName(tablePath.getDatabaseName()); - Connection conn = getConnection(dbUrl); - try { - DatabaseMetaData metaData = conn.getMetaData(); + protected String getSelectColumnsSql(TablePath tablePath) { + return String.format( + SELECT_COLUMNS_SQL_TEMPLATE, tablePath.getDatabaseName(), tablePath.getTableName()); + } - Optional primaryKey = - getPrimaryKey(metaData, tablePath.getDatabaseName(), tablePath.getTableName()); - List constraintKeys = - getConstraintKeys( - metaData, tablePath.getDatabaseName(), tablePath.getTableName()); - String sql = - String.format( - SELECT_COLUMNS, tablePath.getDatabaseName(), tablePath.getTableName()); - try (PreparedStatement ps = conn.prepareStatement(sql); - ResultSet resultSet = ps.executeQuery(); ) { + @Override + protected TableIdentifier getTableIdentifier(TablePath tablePath) { + return TableIdentifier.of( + catalogName, tablePath.getDatabaseName(), tablePath.getTableName()); + } - TableSchema.Builder builder = TableSchema.builder(); - while (resultSet.next()) { - buildTable(resultSet, builder); - } - // add primary key - primaryKey.ifPresent(builder::primaryKey); - // add constraint key - constraintKeys.forEach(builder::constraintKey); - TableIdentifier tableIdentifier = - TableIdentifier.of( - catalogName, tablePath.getDatabaseName(), tablePath.getTableName()); - return CatalogTable.of( - tableIdentifier, - builder.build(), - buildConnectorOptions(tablePath), - Collections.emptyList(), - "", - "mysql"); - } + @Override + protected Optional getPrimaryKey(DatabaseMetaData metaData, TablePath tablePath) + throws SQLException { + return getPrimaryKey( + metaData, + tablePath.getDatabaseName(), + tablePath.getTableName(), + tablePath.getTableName()); + } - } catch (Exception e) { - throw new CatalogException( - String.format("Failed getting table %s", tablePath.getFullName()), e); - } + @Override + protected List getConstraintKeys(DatabaseMetaData metaData, TablePath tablePath) + throws SQLException { + return getConstraintKeys( + metaData, + tablePath.getDatabaseName(), + tablePath.getTableName(), + tablePath.getTableName()); } - private void buildTable(ResultSet resultSet, TableSchema.Builder builder) throws SQLException { + @Override + protected Column buildColumn(ResultSet resultSet) throws SQLException { String columnName = resultSet.getString("COLUMN_NAME"); String sourceType = resultSet.getString("COLUMN_TYPE"); String typeName = resultSet.getString("DATA_TYPE").toUpperCase(); @@ -243,121 +158,39 @@ private void buildTable(ResultSet resultSet, TableSchema.Builder builder) throws break; } - PhysicalColumn physicalColumn = - PhysicalColumn.of( - columnName, - type, - 0, - isNullable, - defaultValue, - comment, - sourceType, - sourceType.contains("unsigned"), - sourceType.contains("zerofill"), - bitLen, - null, - columnLength); - builder.column(physicalColumn); + return PhysicalColumn.of( + columnName, + type, + 0, + isNullable, + defaultValue, + comment, + sourceType, + sourceType.contains("unsigned"), + sourceType.contains("zerofill"), + bitLen, + null, + columnLength); } - public static Map getColumnsDefaultValue(TablePath tablePath, Connection conn) { - StringBuilder queryBuf = new StringBuilder("SHOW FULL COLUMNS FROM "); - queryBuf.append(StringUtils.quoteIdentifier(tablePath.getTableName(), "`", false)); - queryBuf.append(" FROM "); - queryBuf.append(StringUtils.quoteIdentifier(tablePath.getDatabaseName(), "`", false)); - try (PreparedStatement ps2 = conn.prepareStatement(queryBuf.toString())) { - ResultSet rs = ps2.executeQuery(); - Map result = new HashMap<>(); - while (rs.next()) { - String field = rs.getString("Field"); - Object defaultValue = rs.getObject("Default"); - result.put(field, defaultValue); - } - return result; - } catch (Exception e) { - throw new CatalogException( - String.format( - "Failed getting table(%s) columns default value", - tablePath.getFullName()), - e); - } - } - - // todo: If the origin source is mysql, we can directly use create table like to create the @Override - protected boolean createTableInternal(TablePath tablePath, CatalogTable table) - throws CatalogException { - String dbUrl = getUrlFromDatabaseName(tablePath.getDatabaseName()); - - String createTableSql = - MysqlCreateTableSqlBuilder.builder(tablePath, table).build(table.getCatalogName()); - Connection connection = getConnection(dbUrl); - log.info("create table sql: {}", createTableSql); - try (PreparedStatement ps = connection.prepareStatement(createTableSql)) { - return ps.execute(); - } catch (Exception e) { - throw new CatalogException( - String.format("Failed creating table %s", tablePath.getFullName()), e); - } + protected String getCreateTableSql(TablePath tablePath, CatalogTable table) { + return MysqlCreateTableSqlBuilder.builder(tablePath, table).build(table.getCatalogName()); } @Override - protected boolean dropTableInternal(TablePath tablePath) throws CatalogException { - String dbUrl = getUrlFromDatabaseName(tablePath.getDatabaseName()); - Connection connection = getConnection(dbUrl); - try (PreparedStatement ps = - connection.prepareStatement( - String.format("DROP TABLE IF EXISTS %s;", tablePath.getFullName()))) { - // Will there exist concurrent drop for one table? - return ps.execute(); - } catch (SQLException e) { - throw new CatalogException( - String.format("Failed dropping table %s", tablePath.getFullName()), e); - } + protected String getDropTableSql(TablePath tablePath) { + return String.format("DROP TABLE %s;", tablePath.getFullName()); } @Override - protected boolean createDatabaseInternal(String databaseName) throws CatalogException { - try (PreparedStatement ps = - defaultConnection.prepareStatement( - String.format("CREATE DATABASE `%s`;", databaseName))) { - return ps.execute(); - } catch (Exception e) { - throw new CatalogException( - String.format( - "Failed creating database %s in catalog %s", - databaseName, this.catalogName), - e); - } + protected String getCreateDatabaseSql(String databaseName) { + return String.format("CREATE DATABASE `%s`;", databaseName); } @Override - protected boolean dropDatabaseInternal(String databaseName) throws CatalogException { - try (PreparedStatement ps = - defaultConnection.prepareStatement( - String.format("DROP DATABASE `%s`;", databaseName))) { - return ps.execute(); - } catch (Exception e) { - throw new CatalogException( - String.format( - "Failed dropping database %s in catalog %s", - databaseName, this.catalogName), - e); - } - } - - /** - * @see com.mysql.cj.MysqlType - * @see ResultSetImpl#getObjectStoredProc(int, int) - */ - @SuppressWarnings("unchecked") - private SeaTunnelDataType fromJdbcType(ResultSetMetaData metadata, int colIndex) - throws SQLException { - MysqlType mysqlType = MysqlType.getByName(metadata.getColumnTypeName(colIndex)); - Map dataTypeProperties = new HashMap<>(); - dataTypeProperties.put(MysqlDataTypeConvertor.PRECISION, metadata.getPrecision(colIndex)); - dataTypeProperties.put(MysqlDataTypeConvertor.SCALE, metadata.getScale(colIndex)); - return new MysqlDataTypeConvertor().toSeaTunnelType(mysqlType, dataTypeProperties); + protected String getDropDatabaseSql(String databaseName) { + return String.format("DROP DATABASE `%s`;", databaseName); } private SeaTunnelDataType fromJdbcType(String typeName, int precision, int scale) { @@ -365,22 +198,6 @@ private SeaTunnelDataType fromJdbcType(String typeName, int precision, int sc Map dataTypeProperties = new HashMap<>(); dataTypeProperties.put(MysqlDataTypeConvertor.PRECISION, precision); dataTypeProperties.put(MysqlDataTypeConvertor.SCALE, scale); - return new MysqlDataTypeConvertor().toSeaTunnelType(mysqlType, dataTypeProperties); - } - - @SuppressWarnings("MagicNumber") - private Map buildConnectorOptions(TablePath tablePath) { - Map options = new HashMap<>(8); - options.put("connector", "jdbc"); - options.put("url", baseUrl + tablePath.getDatabaseName()); - options.put("table-name", tablePath.getFullName()); - options.put("username", username); - options.put("password", pwd); - return options; - } - - private String getUrlFromDatabaseName(String databaseName) { - String url = baseUrl.endsWith("/") ? baseUrl : baseUrl + "/"; - return url + databaseName + suffix; + return DATA_TYPE_CONVERTOR.toSeaTunnelType(mysqlType, dataTypeProperties); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilder.java index cec934bcb016..3430de04b5af 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilder.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilder.java @@ -25,6 +25,7 @@ import org.apache.seatunnel.api.table.catalog.TableSchema; import org.apache.seatunnel.api.table.type.DecimalType; import org.apache.seatunnel.api.table.type.SqlType; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.utils.CatalogUtils; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; @@ -35,8 +36,8 @@ import java.util.List; import java.util.stream.Collectors; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkNotNull; public class MysqlCreateTableSqlBuilder { @@ -55,6 +56,8 @@ public class MysqlCreateTableSqlBuilder { private MysqlDataTypeConvertor mysqlDataTypeConvertor; + private String fieldIde; + private MysqlCreateTableSqlBuilder(String tableName) { checkNotNull(tableName, "tableName must not be null"); this.tableName = tableName; @@ -76,7 +79,8 @@ public static MysqlCreateTableSqlBuilder builder( .charset(null) .primaryKey(tableSchema.getPrimaryKey()) .constraintKeys(tableSchema.getConstraintKeys()) - .addColumn(tableSchema.getColumns()); + .addColumn(tableSchema.getColumns()) + .fieldIde(catalogTable.getOptions().get("fieldIde")); } public MysqlCreateTableSqlBuilder addColumn(List columns) { @@ -90,6 +94,11 @@ public MysqlCreateTableSqlBuilder primaryKey(PrimaryKey primaryKey) { return this; } + public MysqlCreateTableSqlBuilder fieldIde(String fieldIde) { + this.fieldIde = fieldIde; + return this; + } + public MysqlCreateTableSqlBuilder constraintKeys(List constraintKeys) { this.constraintKeys = constraintKeys; return this; @@ -119,8 +128,9 @@ public String build(String catalogName) { List sqls = new ArrayList<>(); sqls.add( String.format( - "CREATE TABLE IF NOT EXISTS %s (\n%s\n)", - tableName, buildColumnsIdentifySql(catalogName))); + "CREATE TABLE %s (\n%s\n)", + CatalogUtils.quoteIdentifier(tableName, fieldIde, "`"), + buildColumnsIdentifySql(catalogName))); if (engine != null) { sqls.add("ENGINE = " + engine); } @@ -157,7 +167,7 @@ private String buildColumnsIdentifySql(String catalogName) { private String buildColumnIdentifySql(Column column, String catalogName) { final List columnSqls = new ArrayList<>(); - columnSqls.add(column.getName()); + columnSqls.add(CatalogUtils.quoteIdentifier(column.getName(), fieldIde, "`")); if (StringUtils.equals(catalogName, "mysql")) { columnSqls.add(column.getSourceType()); } else { @@ -243,7 +253,7 @@ private String buildPrimaryKeySql() { .map(columnName -> "`" + columnName + "`") .collect(Collectors.joining(", ")); // add sort type - return String.format("PRIMARY KEY (%s)", key); + return String.format("PRIMARY KEY (%s)", CatalogUtils.quoteIdentifier(key, fieldIde)); } private String buildConstraintKeySql(ConstraintKey constraintKey) { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java index 261f4f7fb6ff..b90a86a7abb8 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java @@ -18,33 +18,22 @@ package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.oracle; import org.apache.seatunnel.api.table.catalog.CatalogTable; -import org.apache.seatunnel.api.table.catalog.ConstraintKey; +import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.api.table.catalog.PhysicalColumn; -import org.apache.seatunnel.api.table.catalog.PrimaryKey; -import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.catalog.TablePath; -import org.apache.seatunnel.api.table.catalog.TableSchema; -import org.apache.seatunnel.api.table.catalog.exception.CatalogException; -import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException; -import org.apache.seatunnel.api.table.catalog.exception.TableNotExistException; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.common.utils.JdbcUrlUtil; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog; import lombok.extern.slf4j.Slf4j; -import java.sql.DatabaseMetaData; -import java.sql.PreparedStatement; import java.sql.ResultSet; -import java.sql.ResultSetMetaData; import java.sql.SQLException; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Optional; import static org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.oracle.OracleDataTypeConvertor.ORACLE_BFILE; import static org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.oracle.OracleDataTypeConvertor.ORACLE_BLOB; @@ -61,8 +50,10 @@ @Slf4j public class OracleCatalog extends AbstractJdbcCatalog { + private static final OracleDataTypeConvertor DATA_TYPE_CONVERTOR = new OracleDataTypeConvertor(); + private static final List EXCLUDED_SCHEMAS = Collections.unmodifiableList( Arrays.asList( @@ -87,7 +78,7 @@ public class OracleCatalog extends AbstractJdbcCatalog { "EXFSYS", "SYSMAN")); - private static final String SELECT_COLUMNS_SQL = + private static final String SELECT_COLUMNS_SQL_TEMPLATE = "SELECT\n" + " cols.COLUMN_NAME,\n" + " CASE \n" @@ -127,158 +118,50 @@ public OracleCatalog( } @Override - public List listDatabases() throws CatalogException { - try (PreparedStatement ps = - defaultConnection.prepareStatement("SELECT name FROM v$database")) { - - List databases = new ArrayList<>(); - ResultSet rs = ps.executeQuery(); - - while (rs.next()) { - String databaseName = rs.getString(1); - databases.add(databaseName); - } - return databases; - } catch (Exception e) { - throw new CatalogException( - String.format("Failed listing database in catalog %s", this.catalogName), e); - } + protected String getListDatabaseSql() { + return "SELECT name FROM v$database"; } @Override - protected boolean createTableInternal(TablePath tablePath, CatalogTable table) - throws CatalogException { - String createTableSql = new OracleCreateTableSqlBuilder(table).build(tablePath); - String[] createTableSqls = createTableSql.split(";"); - for (String sql : createTableSqls) { - log.info("create table sql: {}", sql); - try (PreparedStatement ps = defaultConnection.prepareStatement(sql)) { - ps.execute(); - } catch (Exception e) { - throw new CatalogException( - String.format("Failed creating table %s", tablePath.getFullName()), e); - } - } - return true; + protected String getCreateTableSql(TablePath tablePath, CatalogTable table) { + return new OracleCreateTableSqlBuilder(table).build(tablePath); } @Override - protected boolean dropTableInternal(TablePath tablePath) throws CatalogException { - return false; + protected String getDropTableSql(TablePath tablePath) { + return String.format("DROP TABLE %s", getTableName(tablePath)); } @Override - protected boolean createDatabaseInternal(String databaseName) { - return false; + protected String getTableName(TablePath tablePath) { + return tablePath.getSchemaAndTableName().toUpperCase(); } @Override - protected boolean dropDatabaseInternal(String databaseName) throws CatalogException { - return false; + protected String getListTableSql(String databaseName) { + return "SELECT OWNER, TABLE_NAME FROM ALL_TABLES" + + " WHERE TABLE_NAME NOT LIKE 'MDRT_%'" + + " AND TABLE_NAME NOT LIKE 'MDRS_%'" + + " AND TABLE_NAME NOT LIKE 'MDXT_%'" + + " AND (TABLE_NAME NOT LIKE 'SYS_IOT_OVER_%' AND IOT_NAME IS NULL)"; } @Override - public boolean tableExists(TablePath tablePath) throws CatalogException { - try { - return databaseExists(tablePath.getDatabaseName()) - && listTables(tablePath.getDatabaseName()) - .contains(tablePath.getSchemaAndTableName().toUpperCase()); - } catch (DatabaseNotExistException e) { - return false; + protected String getTableName(ResultSet rs) throws SQLException { + if (EXCLUDED_SCHEMAS.contains(rs.getString(1))) { + return null; } + return rs.getString(1) + "." + rs.getString(2); } @Override - public List listTables(String databaseName) - throws CatalogException, DatabaseNotExistException { - if (!databaseExists(databaseName)) { - throw new DatabaseNotExistException(this.catalogName, databaseName); - } - - try (PreparedStatement ps = - defaultConnection.prepareStatement( - "SELECT OWNER, TABLE_NAME FROM ALL_TABLES\n" - + "WHERE TABLE_NAME NOT LIKE 'MDRT_%'\n" - + " AND TABLE_NAME NOT LIKE 'MDRS_%'\n" - + " AND TABLE_NAME NOT LIKE 'MDXT_%'\n" - + " AND (TABLE_NAME NOT LIKE 'SYS_IOT_OVER_%' AND IOT_NAME IS NULL)")) { - - ResultSet rs = ps.executeQuery(); - List tables = new ArrayList<>(); - while (rs.next()) { - if (EXCLUDED_SCHEMAS.contains(rs.getString(1))) { - continue; - } - tables.add(rs.getString(1) + "." + rs.getString(2)); - } - - return tables; - } catch (Exception e) { - throw new CatalogException( - String.format("Failed listing database in catalog %s", catalogName), e); - } + protected String getSelectColumnsSql(TablePath tablePath) { + return String.format( + SELECT_COLUMNS_SQL_TEMPLATE, tablePath.getSchemaName(), tablePath.getTableName()); } @Override - public CatalogTable getTable(TablePath tablePath) - throws CatalogException, TableNotExistException { - if (!tableExists(tablePath)) { - throw new TableNotExistException(catalogName, tablePath); - } - - try { - DatabaseMetaData metaData = defaultConnection.getMetaData(); - Optional primaryKey = - getPrimaryKey( - metaData, - tablePath.getDatabaseName(), - tablePath.getSchemaName(), - tablePath.getTableName()); - List constraintKeys = - getConstraintKeys( - metaData, - tablePath.getDatabaseName(), - tablePath.getSchemaName(), - tablePath.getTableName()); - - String sql = - String.format( - SELECT_COLUMNS_SQL, - tablePath.getSchemaName(), - tablePath.getTableName()); - try (PreparedStatement ps = defaultConnection.prepareStatement(sql); - ResultSet resultSet = ps.executeQuery()) { - TableSchema.Builder builder = TableSchema.builder(); - // add column - while (resultSet.next()) { - buildColumn(resultSet, builder); - } - - // add primary key - primaryKey.ifPresent(builder::primaryKey); - // add constraint key - constraintKeys.forEach(builder::constraintKey); - TableIdentifier tableIdentifier = - TableIdentifier.of( - catalogName, - tablePath.getDatabaseName(), - tablePath.getSchemaName(), - tablePath.getTableName()); - return CatalogTable.of( - tableIdentifier, - builder.build(), - buildConnectorOptions(tablePath), - Collections.emptyList(), - ""); - } - - } catch (Exception e) { - throw new CatalogException( - String.format("Failed getting table %s", tablePath.getFullName()), e); - } - } - - private void buildColumn(ResultSet resultSet, TableSchema.Builder builder) throws SQLException { + protected Column buildColumn(ResultSet resultSet) throws SQLException { String columnName = resultSet.getString("COLUMN_NAME"); String typeName = resultSet.getString("TYPE_NAME"); String fullTypeName = resultSet.getString("FULL_TYPE_NAME"); @@ -314,31 +197,19 @@ private void buildColumn(ResultSet resultSet, TableSchema.Builder builder) throw break; } - PhysicalColumn physicalColumn = - PhysicalColumn.of( - columnName, - type, - 0, - isNullable, - defaultValue, - columnComment, - fullTypeName, - false, - false, - bitLen, - null, - columnLength); - builder.column(physicalColumn); - } - - @SuppressWarnings("unchecked") - private SeaTunnelDataType fromJdbcType(ResultSetMetaData metadata, int colIndex) - throws SQLException { - String columnType = metadata.getColumnTypeName(colIndex); - Map dataTypeProperties = new HashMap<>(); - dataTypeProperties.put(OracleDataTypeConvertor.PRECISION, metadata.getPrecision(colIndex)); - dataTypeProperties.put(OracleDataTypeConvertor.SCALE, metadata.getScale(colIndex)); - return DATA_TYPE_CONVERTOR.toSeaTunnelType(columnType, dataTypeProperties); + return PhysicalColumn.of( + columnName, + type, + 0, + isNullable, + defaultValue, + columnComment, + fullTypeName, + false, + false, + bitLen, + null, + columnLength); } private SeaTunnelDataType fromJdbcType(String typeName, long precision, long scale) { @@ -348,14 +219,13 @@ private SeaTunnelDataType fromJdbcType(String typeName, long precision, long return DATA_TYPE_CONVERTOR.toSeaTunnelType(typeName, dataTypeProperties); } - @SuppressWarnings("MagicNumber") - private Map buildConnectorOptions(TablePath tablePath) { - Map options = new HashMap<>(8); - options.put("connector", "jdbc"); - options.put("url", baseUrl); - options.put("table-name", tablePath.getSchemaAndTableName()); - options.put("username", username); - options.put("password", pwd); - return options; + @Override + protected String getUrlFromDatabaseName(String databaseName) { + return defaultUrl; + } + + @Override + protected String getOptionTableName(TablePath tablePath) { + return tablePath.getSchemaAndTableName(); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilder.java index 984dd93e6a67..4b780131d54b 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilder.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilder.java @@ -23,6 +23,7 @@ import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.type.DecimalType; import org.apache.seatunnel.api.table.type.SqlType; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.utils.CatalogUtils; import org.apache.commons.lang3.StringUtils; @@ -36,23 +37,27 @@ public class OracleCreateTableSqlBuilder { private PrimaryKey primaryKey; private OracleDataTypeConvertor oracleDataTypeConvertor; private String sourceCatalogName; + private String fieldIde; public OracleCreateTableSqlBuilder(CatalogTable catalogTable) { this.columns = catalogTable.getTableSchema().getColumns(); this.primaryKey = catalogTable.getTableSchema().getPrimaryKey(); this.oracleDataTypeConvertor = new OracleDataTypeConvertor(); this.sourceCatalogName = catalogTable.getCatalogName(); + this.fieldIde = catalogTable.getOptions().get("fieldIde"); } public String build(TablePath tablePath) { StringBuilder createTableSql = new StringBuilder(); createTableSql .append("CREATE TABLE ") - .append(tablePath.getSchemaAndTableName()) + .append(tablePath.getSchemaAndTableName("\"")) .append(" (\n"); List columnSqls = - columns.stream().map(this::buildColumnSql).collect(Collectors.toList()); + columns.stream() + .map(column -> CatalogUtils.getFieldIde(buildColumnSql(column), fieldIde)) + .collect(Collectors.toList()); // Add primary key directly in the create table statement if (primaryKey != null @@ -70,7 +75,7 @@ public String build(TablePath tablePath) { .map( column -> buildColumnCommentSql( - column, tablePath.getSchemaAndTableName())) + column, tablePath.getSchemaAndTableName("\""))) .collect(Collectors.toList()); if (!commentSqls.isEmpty()) { @@ -83,7 +88,7 @@ public String build(TablePath tablePath) { private String buildColumnSql(Column column) { StringBuilder columnSql = new StringBuilder(); - columnSql.append(column.getName()).append(" "); + columnSql.append("\"").append(column.getName()).append("\" "); String columnType = sourceCatalogName.equals("oracle") @@ -95,11 +100,6 @@ private String buildColumnSql(Column column) { columnSql.append(" NOT NULL"); } - // if (column.getDefaultValue() != null) { - // columnSql.append(" DEFAULT - // '").append(column.getDefaultValue().toString()).append("'"); - // } - return columnSql.toString(); } @@ -140,7 +140,10 @@ private String buildColumnType(Column column) { private String buildPrimaryKeySql(PrimaryKey primaryKey) { String randomSuffix = UUID.randomUUID().toString().replace("-", "").substring(0, 4); - String columnNamesString = String.join(", ", primaryKey.getColumnNames()); + String columnNamesString = + primaryKey.getColumnNames().stream() + .map(columnName -> "\"" + columnName + "\"") + .collect(Collectors.joining(", ")); // In Oracle database, the maximum length for an identifier is 30 characters. String primaryKeyStr = primaryKey.getPrimaryKey(); @@ -148,21 +151,26 @@ private String buildPrimaryKeySql(PrimaryKey primaryKey) { primaryKeyStr = primaryKeyStr.substring(0, 25); } - return "CONSTRAINT " - + primaryKeyStr - + "_" - + randomSuffix - + " PRIMARY KEY (" - + columnNamesString - + ")"; + return CatalogUtils.getFieldIde( + "CONSTRAINT " + + primaryKeyStr + + "_" + + randomSuffix + + " PRIMARY KEY (" + + columnNamesString + + ")", + fieldIde); } private String buildColumnCommentSql(Column column, String tableName) { StringBuilder columnCommentSql = new StringBuilder(); - columnCommentSql.append("COMMENT ON COLUMN ").append(tableName).append("."); columnCommentSql - .append(column.getName()) - .append(" IS '") + .append(CatalogUtils.quoteIdentifier("COMMENT ON COLUMN ", fieldIde)) + .append(tableName) + .append("."); + columnCommentSql + .append(CatalogUtils.quoteIdentifier(column.getName(), fieldIde, "\"")) + .append(CatalogUtils.quoteIdentifier(" IS '", fieldIde)) .append(column.getComment()) .append("'"); return columnCommentSql.toString(); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalog.java index e3507666d08f..2769d09ebb70 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalog.java @@ -18,39 +18,20 @@ package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.psql; import org.apache.seatunnel.api.table.catalog.CatalogTable; -import org.apache.seatunnel.api.table.catalog.ConstraintKey; +import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.api.table.catalog.PhysicalColumn; -import org.apache.seatunnel.api.table.catalog.PrimaryKey; -import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.catalog.TablePath; -import org.apache.seatunnel.api.table.catalog.TableSchema; import org.apache.seatunnel.api.table.catalog.exception.CatalogException; -import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException; -import org.apache.seatunnel.api.table.catalog.exception.TableNotExistException; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.common.utils.JdbcUrlUtil; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog; -import com.mysql.cj.MysqlType; -import com.mysql.cj.jdbc.result.ResultSetImpl; import lombok.extern.slf4j.Slf4j; -import java.sql.Connection; -import java.sql.DatabaseMetaData; -import java.sql.DriverManager; -import java.sql.PreparedStatement; import java.sql.ResultSet; -import java.sql.ResultSetMetaData; import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; -import java.util.List; import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; import static org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.psql.PostgresDataTypeConvertor.PG_BIT; import static org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.psql.PostgresDataTypeConvertor.PG_BYTEA; @@ -65,7 +46,10 @@ @Slf4j public class PostgresCatalog extends AbstractJdbcCatalog { - private static final String SELECT_COLUMNS_SQL = + private static final PostgresDataTypeConvertor DATA_TYPE_CONVERTOR = + new PostgresDataTypeConvertor(); + + private static final String SELECT_COLUMNS_SQL_TEMPLATE = "SELECT \n" + " a.attname AS column_name, \n" + "\t\tt.typname as type_name,\n" @@ -102,8 +86,6 @@ public class PostgresCatalog extends AbstractJdbcCatalog { + "ORDER BY \n" + " a.attnum;"; - protected static final Set SYS_DATABASES = new HashSet<>(9); - static { SYS_DATABASES.add("information_schema"); SYS_DATABASES.add("pg_catalog"); @@ -116,8 +98,6 @@ public class PostgresCatalog extends AbstractJdbcCatalog { SYS_DATABASES.add("template1"); } - protected final Map connectionMap; - public PostgresCatalog( String catalogName, String username, @@ -125,154 +105,26 @@ public PostgresCatalog( JdbcUrlUtil.UrlInfo urlInfo, String defaultSchema) { super(catalogName, username, pwd, urlInfo, defaultSchema); - this.connectionMap = new ConcurrentHashMap<>(); - } - - public Connection getConnection(String url) { - if (connectionMap.containsKey(url)) { - return connectionMap.get(url); - } - try { - Connection connection = DriverManager.getConnection(url, username, pwd); - connectionMap.put(url, connection); - return connection; - } catch (SQLException e) { - throw new CatalogException(String.format("Failed connecting to %s via JDBC.", url), e); - } } @Override - public void close() throws CatalogException { - for (Map.Entry entry : connectionMap.entrySet()) { - try { - entry.getValue().close(); - } catch (SQLException e) { - throw new CatalogException( - String.format("Failed to close %s via JDBC.", entry.getKey()), e); - } - } - super.close(); + protected String getListDatabaseSql() { + return "select datname from pg_database;"; } @Override - public List listDatabases() throws CatalogException { - try (PreparedStatement ps = - defaultConnection.prepareStatement("select datname from pg_database;")) { - - List databases = new ArrayList<>(); - ResultSet rs = ps.executeQuery(); - - while (rs.next()) { - String databaseName = rs.getString(1); - if (!SYS_DATABASES.contains(databaseName)) { - databases.add(rs.getString(1)); - } - } - - return databases; - } catch (Exception e) { - throw new CatalogException( - String.format("Failed listing database in catalog %s", this.catalogName), e); - } + protected String getListTableSql(String databaseName) { + return "SELECT table_schema, table_name FROM information_schema.tables;"; } @Override - public List listTables(String databaseName) - throws CatalogException, DatabaseNotExistException { - if (!databaseExists(databaseName)) { - throw new DatabaseNotExistException(this.catalogName, databaseName); - } - - String dbUrl = getUrlFromDatabaseName(databaseName); - Connection connection = getConnection(dbUrl); - try (PreparedStatement ps = - connection.prepareStatement( - "SELECT table_schema, table_name FROM information_schema.tables;")) { - - ResultSet rs = ps.executeQuery(); - - List tables = new ArrayList<>(); - - while (rs.next()) { - String schemaName = rs.getString("table_schema"); - String tableName = rs.getString("table_name"); - if (org.apache.commons.lang3.StringUtils.isNotBlank(schemaName) - && !SYS_DATABASES.contains(schemaName)) { - tables.add(schemaName + "." + tableName); - } - } - - return tables; - } catch (Exception e) { - throw new CatalogException( - String.format("Failed listing database in catalog %s", catalogName), e); - } + protected String getSelectColumnsSql(TablePath tablePath) { + return String.format( + SELECT_COLUMNS_SQL_TEMPLATE, tablePath.getSchemaName(), tablePath.getTableName()); } @Override - public CatalogTable getTable(TablePath tablePath) - throws CatalogException, TableNotExistException { - if (!tableExists(tablePath)) { - throw new TableNotExistException(catalogName, tablePath); - } - - String dbUrl = getUrlFromDatabaseName(tablePath.getDatabaseName()); - Connection conn = getConnection(dbUrl); - try { - DatabaseMetaData metaData = conn.getMetaData(); - Optional primaryKey = - getPrimaryKey( - metaData, - tablePath.getDatabaseName(), - tablePath.getSchemaName(), - tablePath.getTableName()); - List constraintKeys = - getConstraintKeys( - metaData, - tablePath.getDatabaseName(), - tablePath.getSchemaName(), - tablePath.getTableName()); - - String sql = - String.format( - SELECT_COLUMNS_SQL, - tablePath.getSchemaName(), - tablePath.getTableName()); - try (PreparedStatement ps = conn.prepareStatement(sql); - ResultSet resultSet = ps.executeQuery()) { - TableSchema.Builder builder = TableSchema.builder(); - - // add column - while (resultSet.next()) { - buildColumn(resultSet, builder); - } - - // add primary key - primaryKey.ifPresent(builder::primaryKey); - // add constraint key - constraintKeys.forEach(builder::constraintKey); - TableIdentifier tableIdentifier = - TableIdentifier.of( - catalogName, - tablePath.getDatabaseName(), - tablePath.getSchemaName(), - tablePath.getTableName()); - return CatalogTable.of( - tableIdentifier, - builder.build(), - buildConnectorOptions(tablePath), - Collections.emptyList(), - "", - "postgres"); - } - - } catch (Exception e) { - throw new CatalogException( - String.format("Failed getting table %s", tablePath.getFullName()), e); - } - } - - private void buildColumn(ResultSet resultSet, TableSchema.Builder builder) throws SQLException { + protected Column buildColumn(ResultSet resultSet) throws SQLException { String columnName = resultSet.getString("column_name"); String typeName = resultSet.getString("type_name"); String fullTypeName = resultSet.getString("full_type_name"); @@ -282,8 +134,9 @@ private void buildColumn(ResultSet resultSet, TableSchema.Builder builder) throw Object defaultValue = resultSet.getObject("default_value"); boolean isNullable = resultSet.getString("is_nullable").equals("YES"); - if (defaultValue != null && defaultValue.toString().contains("regclass")) + if (defaultValue != null && defaultValue.toString().contains("regclass")) { defaultValue = null; + } SeaTunnelDataType type = fromJdbcType(typeName, columnLength, columnScale); long bitLen = 0; @@ -311,131 +164,55 @@ private void buildColumn(ResultSet resultSet, TableSchema.Builder builder) throw break; } - PhysicalColumn physicalColumn = - PhysicalColumn.of( - columnName, - type, - 0, - isNullable, - defaultValue, - columnComment, - fullTypeName, - false, - false, - bitLen, - null, - columnLength); - builder.column(physicalColumn); + return PhysicalColumn.of( + columnName, + type, + 0, + isNullable, + defaultValue, + columnComment, + fullTypeName, + false, + false, + bitLen, + null, + columnLength); } @Override - protected boolean createTableInternal(TablePath tablePath, CatalogTable table) - throws CatalogException { - String createTableSql = new PostgresCreateTableSqlBuilder(table).build(tablePath); - String dbUrl = getUrlFromDatabaseName(tablePath.getDatabaseName()); - Connection conn = getConnection(dbUrl); - log.info("create table sql: {}", createTableSql); - try (PreparedStatement ps = conn.prepareStatement(createTableSql)) { - ps.execute(); - } catch (Exception e) { - throw new CatalogException( - String.format("Failed creating table %s", tablePath.getFullName()), e); - } - return true; + protected String getCreateTableSql(TablePath tablePath, CatalogTable table) { + return new PostgresCreateTableSqlBuilder(table).build(tablePath); } @Override - protected boolean dropTableInternal(TablePath tablePath) throws CatalogException { - String dbUrl = getUrlFromDatabaseName(tablePath.getDatabaseName()); - - String schemaName = tablePath.getSchemaName(); - String tableName = tablePath.getTableName(); - - String sql = "DROP TABLE IF EXISTS \"" + schemaName + "\".\"" + tableName + "\""; - Connection connection = getConnection(dbUrl); - try (PreparedStatement ps = connection.prepareStatement(sql)) { - // Will there exist concurrent drop for one table? - return ps.execute(); - } catch (SQLException e) { - throw new CatalogException( - String.format("Failed dropping table %s", tablePath.getFullName()), e); - } + protected String getDropTableSql(TablePath tablePath) { + return "DROP TABLE \"" + + tablePath.getSchemaName() + + "\".\"" + + tablePath.getTableName() + + "\""; } @Override - protected boolean createDatabaseInternal(String databaseName) throws CatalogException { - String sql = "CREATE DATABASE \"" + databaseName + "\""; - try (PreparedStatement ps = defaultConnection.prepareStatement(sql)) { - return ps.execute(); - } catch (Exception e) { - throw new CatalogException( - String.format( - "Failed creating database %s in catalog %s", - databaseName, this.catalogName), - e); - } + protected String getCreateDatabaseSql(String databaseName) { + return "CREATE DATABASE \"" + databaseName + "\""; } @Override - public boolean tableExists(TablePath tablePath) throws CatalogException { - try { - return databaseExists(tablePath.getDatabaseName()) - && listTables(tablePath.getDatabaseName()) - .contains(tablePath.getSchemaAndTableName()); - } catch (DatabaseNotExistException e) { - return false; - } + protected String getDropDatabaseSql(String databaseName) { + return "DROP DATABASE \"" + databaseName + "\""; } @Override - protected boolean dropDatabaseInternal(String databaseName) throws CatalogException { - String sql = "DROP DATABASE IF EXISTS \"" + databaseName + "\""; - try (PreparedStatement ps = defaultConnection.prepareStatement(sql)) { - return ps.execute(); - } catch (Exception e) { - throw new CatalogException( - String.format( - "Failed dropping database %s in catalog %s", - databaseName, this.catalogName), - e); - } - } - - /** - * @see MysqlType - * @see ResultSetImpl#getObjectStoredProc(int, int) - */ - @SuppressWarnings("unchecked") - private SeaTunnelDataType fromJdbcType(ResultSetMetaData metadata, int colIndex) - throws SQLException { - String columnTypeName = metadata.getColumnTypeName(colIndex); - Map dataTypeProperties = new HashMap<>(); - dataTypeProperties.put( - PostgresDataTypeConvertor.PRECISION, metadata.getPrecision(colIndex)); - dataTypeProperties.put(PostgresDataTypeConvertor.SCALE, metadata.getScale(colIndex)); - return new PostgresDataTypeConvertor().toSeaTunnelType(columnTypeName, dataTypeProperties); + protected void dropDatabaseInternal(String databaseName) throws CatalogException { + closeDatabaseConnection(databaseName); + super.dropDatabaseInternal(databaseName); } private SeaTunnelDataType fromJdbcType(String typeName, long precision, long scale) { Map dataTypeProperties = new HashMap<>(); dataTypeProperties.put(PostgresDataTypeConvertor.PRECISION, precision); dataTypeProperties.put(PostgresDataTypeConvertor.SCALE, scale); - return new PostgresDataTypeConvertor().toSeaTunnelType(typeName, dataTypeProperties); - } - - @SuppressWarnings("MagicNumber") - private Map buildConnectorOptions(TablePath tablePath) { - Map options = new HashMap<>(8); - options.put("connector", "jdbc"); - options.put("url", baseUrl + tablePath.getDatabaseName()); - options.put("table-name", tablePath.getFullName()); - options.put("username", username); - options.put("password", pwd); - return options; - } - - private String getUrlFromDatabaseName(String databaseName) { - String url = baseUrl.endsWith("/") ? baseUrl : baseUrl + "/"; - return url + databaseName + suffix; + return DATA_TYPE_CONVERTOR.toSeaTunnelType(typeName, dataTypeProperties); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilder.java index 85f4468bef9e..74b684c0e399 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilder.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilder.java @@ -23,6 +23,7 @@ import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.type.DecimalType; import org.apache.seatunnel.api.table.type.SqlType; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.utils.CatalogUtils; import org.apache.commons.lang3.StringUtils; @@ -37,23 +38,30 @@ public class PostgresCreateTableSqlBuilder { private PrimaryKey primaryKey; private PostgresDataTypeConvertor postgresDataTypeConvertor; private String sourceCatalogName; + private String fieldIde; public PostgresCreateTableSqlBuilder(CatalogTable catalogTable) { this.columns = catalogTable.getTableSchema().getColumns(); this.primaryKey = catalogTable.getTableSchema().getPrimaryKey(); this.postgresDataTypeConvertor = new PostgresDataTypeConvertor(); this.sourceCatalogName = catalogTable.getCatalogName(); + this.fieldIde = catalogTable.getOptions().get("fieldIde"); } public String build(TablePath tablePath) { StringBuilder createTableSql = new StringBuilder(); createTableSql - .append("CREATE TABLE IF NOT EXISTS ") - .append(tablePath.getSchemaAndTableName()) + .append(CatalogUtils.quoteIdentifier("CREATE TABLE ", fieldIde)) + .append(tablePath.getSchemaAndTableName("\"")) .append(" (\n"); List columnSqls = - columns.stream().map(this::buildColumnSql).collect(Collectors.toList()); + columns.stream() + .map( + column -> + CatalogUtils.quoteIdentifier( + buildColumnSql(column), fieldIde)) + .collect(Collectors.toList()); createTableSql.append(String.join(",\n", columnSqls)); createTableSql.append("\n);"); @@ -64,7 +72,7 @@ public String build(TablePath tablePath) { .map( columns -> buildColumnCommentSql( - columns, tablePath.getSchemaAndTableName())) + columns, tablePath.getSchemaAndTableName("\""))) .collect(Collectors.toList()); if (!commentSqls.isEmpty()) { @@ -77,7 +85,7 @@ public String build(TablePath tablePath) { private String buildColumnSql(Column column) { StringBuilder columnSql = new StringBuilder(); - columnSql.append(column.getName()).append(" "); + columnSql.append("\"").append(column.getName()).append("\" "); // For simplicity, assume the column type in SeaTunnelDataType is the same as in PostgreSQL String columnType = @@ -96,12 +104,6 @@ private String buildColumnSql(Column column) { columnSql.append(" PRIMARY KEY"); } - // Add default value if exists - // if (column.getDefaultValue() != null) { - // columnSql.append(" DEFAULT - // '").append(column.getDefaultValue().toString()).append("'"); - // } - return columnSql.toString(); } @@ -133,10 +135,13 @@ private String buildColumnType(Column column) { private String buildColumnCommentSql(Column column, String tableName) { StringBuilder columnCommentSql = new StringBuilder(); - columnCommentSql.append("COMMENT ON COLUMN ").append(tableName).append("."); columnCommentSql - .append(column.getName()) - .append(" IS '") + .append(CatalogUtils.quoteIdentifier("COMMENT ON COLUMN ", fieldIde)) + .append(tableName) + .append("."); + columnCommentSql + .append(CatalogUtils.quoteIdentifier(column.getName(), fieldIde, "\"")) + .append(CatalogUtils.quoteIdentifier(" IS '", fieldIde)) .append(column.getComment()) .append("'"); return columnCommentSql.toString(); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresDataTypeConvertor.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresDataTypeConvertor.java index c87a2fc11884..d1f8a5691dff 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresDataTypeConvertor.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresDataTypeConvertor.java @@ -105,6 +105,8 @@ public class PostgresDataTypeConvertor implements DataTypeConvertor { public static final String PG_INTERVAL = "interval"; public static final String PG_GEOMETRY = "geometry"; public static final String PG_GEOGRAPHY = "geography"; + public static final String PG_JSON = "json"; + public static final String PG_JSONB = "jsonb"; @Override public SeaTunnelDataType toSeaTunnelType(String connectorDataType) { @@ -160,6 +162,8 @@ public SeaTunnelDataType toSeaTunnelType( case PG_INTERVAL: case PG_GEOMETRY: case PG_GEOGRAPHY: + case PG_JSON: + case PG_JSONB: return BasicType.STRING_TYPE; case PG_CHAR_ARRAY: case PG_CHARACTER_ARRAY: diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalog.java index ea04c60bff56..7d18ed2d9058 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalog.java @@ -19,15 +19,10 @@ package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.sqlserver; import org.apache.seatunnel.api.table.catalog.CatalogTable; -import org.apache.seatunnel.api.table.catalog.ConstraintKey; +import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.api.table.catalog.PhysicalColumn; -import org.apache.seatunnel.api.table.catalog.PrimaryKey; -import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.catalog.TablePath; -import org.apache.seatunnel.api.table.catalog.TableSchema; import org.apache.seatunnel.api.table.catalog.exception.CatalogException; -import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException; -import org.apache.seatunnel.api.table.catalog.exception.TableNotExistException; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.common.utils.JdbcUrlUtil; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog; @@ -37,33 +32,35 @@ import lombok.extern.slf4j.Slf4j; -import java.sql.Connection; -import java.sql.DatabaseMetaData; -import java.sql.DriverManager; -import java.sql.PreparedStatement; import java.sql.ResultSet; -import java.sql.ResultSetMetaData; import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; -import java.util.List; import java.util.Map; -import java.util.Optional; -import java.util.Set; @Slf4j public class SqlServerCatalog extends AbstractJdbcCatalog { - private static final Set SYS_DATABASES = new HashSet<>(4); - - static { - SYS_DATABASES.add("master"); - SYS_DATABASES.add("tempdb"); - SYS_DATABASES.add("model"); - SYS_DATABASES.add("msdb"); - } + private static final SqlServerDataTypeConvertor DATA_TYPE_CONVERTOR = + new SqlServerDataTypeConvertor(); + + private static final String SELECT_COLUMNS_SQL_TEMPLATE = + "SELECT tbl.name AS table_name,\n" + + " col.name AS column_name,\n" + + " ext.value AS comment,\n" + + " col.column_id AS column_id,\n" + + " types.name AS type,\n" + + " col.max_length AS max_length,\n" + + " col.precision AS precision,\n" + + " col.scale AS scale,\n" + + " col.is_nullable AS is_nullable,\n" + + " def.definition AS default_value\n" + + "FROM sys.tables tbl\n" + + " INNER JOIN sys.columns col ON tbl.object_id = col.object_id\n" + + " LEFT JOIN sys.types types ON col.user_type_id = types.user_type_id\n" + + " LEFT JOIN sys.extended_properties ext ON ext.major_id = col.object_id AND ext.minor_id = col.column_id\n" + + " LEFT JOIN sys.default_constraints def ON col.default_object_id = def.object_id AND ext.minor_id = col.column_id AND ext.name = 'MS_Description'\n" + + "WHERE schema_name(tbl.schema_id) = '%s' %s\n" + + "ORDER BY tbl.name, col.column_id"; public SqlServerCatalog( String catalogName, @@ -75,133 +72,29 @@ public SqlServerCatalog( } @Override - public List listDatabases() throws CatalogException { - try (Connection conn = DriverManager.getConnection(defaultUrl, username, pwd); - PreparedStatement ps = conn.prepareStatement("SELECT NAME FROM sys.databases")) { - - List databases = new ArrayList<>(); - ResultSet rs = ps.executeQuery(); - - while (rs.next()) { - String databaseName = rs.getString(1); - if (!SYS_DATABASES.contains(databaseName)) { - databases.add(databaseName); - } - } - - return databases; - } catch (Exception e) { - throw new CatalogException( - String.format("Failed listing database in catalog %s", this.catalogName), e); - } - } - - @Override - public List listTables(String databaseName) - throws CatalogException, DatabaseNotExistException { - if (!databaseExists(databaseName)) { - throw new DatabaseNotExistException(this.catalogName, databaseName); - } - - String dbUrl = getUrlFromDatabaseName(databaseName); - try (Connection conn = DriverManager.getConnection(dbUrl, username, pwd); - PreparedStatement ps = - conn.prepareStatement( - "SELECT TABLE_SCHEMA, TABLE_NAME FROM " - + databaseName - + ".INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE'")) { - - ResultSet rs = ps.executeQuery(); - - List tables = new ArrayList<>(); - - while (rs.next()) { - tables.add(rs.getString(1) + "." + rs.getString(2)); - } - - return tables; - } catch (Exception e) { - throw new CatalogException( - String.format("Failed listing database in catalog %s", catalogName), e); - } + protected String getListDatabaseSql() { + return "SELECT NAME FROM sys.databases"; } @Override - public boolean tableExists(TablePath tablePath) throws CatalogException { - try { - return databaseExists(tablePath.getDatabaseName()) - && listTables(tablePath.getDatabaseName()) - .contains(tablePath.getSchemaAndTableName()); - } catch (DatabaseNotExistException e) { - return false; - } + protected String getListTableSql(String databaseName) { + return "SELECT TABLE_SCHEMA, TABLE_NAME FROM " + + databaseName + + ".INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE'"; } @Override - public CatalogTable getTable(TablePath tablePath) - throws CatalogException, TableNotExistException { - if (!tableExists(tablePath)) { - throw new TableNotExistException(catalogName, tablePath); - } + protected String getSelectColumnsSql(TablePath tablePath) { String tableSql = StringUtils.isNotEmpty(tablePath.getTableName()) ? "AND tbl.name = '" + tablePath.getTableName() + "'" : ""; - String columnSql = - String.format( - " SELECT tbl.name AS table_name, \n col.name AS column_name, \n ext.value AS comment, \n col.column_id AS column_id, \n types.name AS type, \n col.max_length AS max_length, \n col.precision AS precision, \n col.scale AS scale, \n col.is_nullable AS is_nullable, \n def.definition AS default_value\n FROM sys.tables tbl \nINNER JOIN sys.columns col \n ON tbl.object_id = col.object_id \n LEFT JOIN sys.types types \n ON col.user_type_id = types.user_type_id \n LEFT JOIN sys.extended_properties ext \n ON ext.major_id = col.object_id and ext.minor_id = col.column_id \n LEFT JOIN sys.default_constraints def ON col.default_object_id = def.object_id \n AND ext.minor_id = col.column_id \n AND ext.name = 'MS_Description' \n WHERE schema_name(tbl.schema_id) = '%s' \n %s \n ORDER BY tbl.name, col.column_id", - tablePath.getSchemaName(), tableSql); - - String dbUrl = getUrlFromDatabaseName(tablePath.getDatabaseName()); - try (Connection conn = DriverManager.getConnection(dbUrl, username, pwd)) { - DatabaseMetaData metaData = conn.getMetaData(); - Optional primaryKey = - getPrimaryKey( - metaData, - tablePath.getDatabaseName(), - tablePath.getSchemaName(), - tablePath.getTableName()); - List constraintKeys = - getConstraintKeys( - metaData, - tablePath.getDatabaseName(), - tablePath.getSchemaName(), - tablePath.getTableName()); - - try (PreparedStatement ps = conn.prepareStatement(columnSql); - ResultSet resultSet = ps.executeQuery(); ) { - TableSchema.Builder builder = TableSchema.builder(); - while (resultSet.next()) { - buildTable(resultSet, builder); - } - - // add primary key - primaryKey.ifPresent(builder::primaryKey); - // add constraint key - constraintKeys.forEach(builder::constraintKey); - TableIdentifier tableIdentifier = - TableIdentifier.of( - catalogName, - tablePath.getDatabaseName(), - tablePath.getSchemaName(), - tablePath.getTableName()); - return CatalogTable.of( - tableIdentifier, - builder.build(), - buildConnectorOptions(tablePath), - Collections.emptyList(), - "", - "sqlserver"); - } - - } catch (Exception e) { - throw new CatalogException( - String.format("Failed getting table %s", tablePath.getFullName()), e); - } + return String.format(SELECT_COLUMNS_SQL_TEMPLATE, tablePath.getSchemaName(), tableSql); } - private void buildTable(ResultSet resultSet, TableSchema.Builder builder) throws SQLException { + @Override + protected Column buildColumn(ResultSet resultSet) throws SQLException { String columnName = resultSet.getString("column_name"); String sourceType = resultSet.getString("type"); // String typeName = resultSet.getString("DATA_TYPE").toUpperCase(); @@ -266,21 +159,19 @@ private void buildTable(ResultSet resultSet, TableSchema.Builder builder) throws default: break; } - PhysicalColumn physicalColumn = - PhysicalColumn.of( - columnName, - type, - 0, - isNullable, - defaultValue, - comment, - sourceType, - false, - false, - bitLen, - null, - columnLength); - builder.column(physicalColumn); + return PhysicalColumn.of( + columnName, + type, + 0, + isNullable, + defaultValue, + comment, + sourceType, + false, + false, + bitLen, + null, + columnLength); } private SeaTunnelDataType fromJdbcType(String typeName, int precision, int scale) { @@ -288,103 +179,37 @@ private SeaTunnelDataType fromJdbcType(String typeName, int precision, int sc Map dataTypeProperties = new HashMap<>(); dataTypeProperties.put(SqlServerDataTypeConvertor.PRECISION, precision); dataTypeProperties.put(SqlServerDataTypeConvertor.SCALE, scale); - return new SqlServerDataTypeConvertor().toSeaTunnelType(pair.getLeft(), dataTypeProperties); + return DATA_TYPE_CONVERTOR.toSeaTunnelType(pair.getLeft(), dataTypeProperties); } @Override - protected boolean createTableInternal(TablePath tablePath, CatalogTable table) - throws CatalogException { - - String createTableSql = - SqlServerCreateTableSqlBuilder.builder(tablePath, table).build(tablePath, table); - log.info("create table sql: {}", createTableSql); - try (Connection conn = DriverManager.getConnection(defaultUrl, username, pwd); - PreparedStatement ps = conn.prepareStatement(createTableSql)) { - System.out.println(createTableSql); - return ps.execute(); - } catch (Exception e) { - throw new CatalogException( - String.format("Failed creating table %s", tablePath.getFullName()), e); - } + protected String getCreateTableSql(TablePath tablePath, CatalogTable table) { + return SqlServerCreateTableSqlBuilder.builder(tablePath, table).build(tablePath, table); } @Override - protected boolean dropTableInternal(TablePath tablePath) throws CatalogException { - String dbUrl = getUrlFromDatabaseName(tablePath.getDatabaseName()); - try (Connection conn = DriverManager.getConnection(dbUrl, username, pwd); - PreparedStatement ps = - conn.prepareStatement( - String.format( - "DROP TABLE IF EXISTS %s", tablePath.getFullName()))) { - // Will there exist concurrent drop for one table? - return ps.execute(); - } catch (SQLException e) { - throw new CatalogException( - String.format("Failed dropping table %s", tablePath.getFullName()), e); - } + protected String getDropTableSql(TablePath tablePath) { + return String.format("DROP TABLE %s", tablePath.getFullName()); } @Override - protected boolean createDatabaseInternal(String databaseName) throws CatalogException { - try (Connection conn = DriverManager.getConnection(defaultUrl, username, pwd); - PreparedStatement ps = - conn.prepareStatement( - String.format("CREATE DATABASE `%s`", databaseName))) { - return ps.execute(); - } catch (Exception e) { - throw new CatalogException( - String.format( - "Failed creating database %s in catalog %s", - databaseName, this.catalogName), - e); - } + protected String getCreateDatabaseSql(String databaseName) { + return String.format("CREATE DATABASE %s", databaseName); } @Override - protected boolean dropDatabaseInternal(String databaseName) throws CatalogException { - try (Connection conn = DriverManager.getConnection(defaultUrl, username, pwd); - PreparedStatement ps = - conn.prepareStatement( - String.format("DROP DATABASE IF EXISTS `%s`;", databaseName))) { - return ps.execute(); - } catch (Exception e) { - throw new CatalogException( - String.format( - "Failed dropping database %s in catalog %s", - databaseName, this.catalogName), - e); - } + protected String getDropDatabaseSql(String databaseName) { + return String.format("DROP DATABASE %s;", databaseName); } - @SuppressWarnings("unchecked") - private SeaTunnelDataType fromJdbcType(ResultSetMetaData metadata, int colIndex) - throws SQLException { - Pair> pair = - SqlServerType.parse(metadata.getColumnTypeName(colIndex)); - Map dataTypeProperties = new HashMap<>(); - dataTypeProperties.put( - SqlServerDataTypeConvertor.PRECISION, metadata.getPrecision(colIndex)); - dataTypeProperties.put(SqlServerDataTypeConvertor.SCALE, metadata.getScale(colIndex)); - return new SqlServerDataTypeConvertor().toSeaTunnelType(pair.getLeft(), dataTypeProperties); - } - - @SuppressWarnings("MagicNumber") - private Map buildConnectorOptions(TablePath tablePath) { - Map options = new HashMap<>(8); - options.put("connector", "jdbc"); - options.put("url", getUrlFromDatabaseName(tablePath.getDatabaseName())); - options.put("table-name", tablePath.getFullName()); - options.put("username", username); - options.put("password", pwd); - return options; + @Override + protected void dropDatabaseInternal(String databaseName) throws CatalogException { + closeDatabaseConnection(databaseName); + super.dropDatabaseInternal(databaseName); } - private String getUrlFromDatabaseName(String databaseName) { + @Override + protected String getUrlFromDatabaseName(String databaseName) { return baseUrl + ";databaseName=" + databaseName + ";" + suffix; } - - private String getCreateTableSql(TablePath tablePath, CatalogTable table) { - - return ""; - } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilder.java index cf100075ad1d..86afa6e41e17 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilder.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilder.java @@ -25,6 +25,7 @@ import org.apache.seatunnel.api.table.catalog.TableSchema; import org.apache.seatunnel.api.table.type.DecimalType; import org.apache.seatunnel.api.table.type.SqlType; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.utils.CatalogUtils; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; @@ -35,8 +36,8 @@ import java.util.Map; import java.util.stream.Collectors; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkNotNull; public class SqlServerCreateTableSqlBuilder { @@ -55,6 +56,8 @@ public class SqlServerCreateTableSqlBuilder { private SqlServerDataTypeConvertor sqlServerDataTypeConvertor; + private String fieldIde; + private SqlServerCreateTableSqlBuilder(String tableName) { checkNotNull(tableName, "tableName must not be null"); this.tableName = tableName; @@ -76,7 +79,8 @@ public static SqlServerCreateTableSqlBuilder builder( .charset(null) .primaryKey(tableSchema.getPrimaryKey()) .constraintKeys(tableSchema.getConstraintKeys()) - .addColumn(tableSchema.getColumns()); + .addColumn(tableSchema.getColumns()) + .fieldIde(catalogTable.getOptions().get("fieldIde")); } public SqlServerCreateTableSqlBuilder addColumn(List columns) { @@ -90,6 +94,11 @@ public SqlServerCreateTableSqlBuilder primaryKey(PrimaryKey primaryKey) { return this; } + public SqlServerCreateTableSqlBuilder fieldIde(String fieldIde) { + this.fieldIde = fieldIde; + return this; + } + public SqlServerCreateTableSqlBuilder constraintKeys(List constraintKeys) { this.constraintKeys = constraintKeys; return this; @@ -117,7 +126,7 @@ public SqlServerCreateTableSqlBuilder comment(String comment) { public String build(TablePath tablePath, CatalogTable catalogTable) { List sqls = new ArrayList<>(); - String sqlTableName = tablePath.getFullName(); + String sqlTableName = tablePath.getFullNameWithQuoted("[", "]"); Map columnComments = new HashMap<>(); sqls.add( String.format( @@ -137,6 +146,7 @@ public String build(TablePath tablePath, CatalogTable catalogTable) { sqls.add("COLLATE = " + collate); } String sqlTableSql = String.join(" ", sqls) + ";"; + sqlTableSql = CatalogUtils.quoteIdentifier(sqlTableSql, fieldIde); StringBuilder tableAndColumnComment = new StringBuilder(); if (comment != null) { sqls.add("COMMENT = '" + comment + "'"); @@ -185,7 +195,7 @@ private String buildColumnsIdentifySql(String catalogName, Map c private String buildColumnIdentifySql( Column column, String catalogName, Map columnComments) { final List columnSqls = new ArrayList<>(); - columnSqls.add(column.getName()); + columnSqls.add("[" + column.getName() + "]"); String tyNameDef = ""; if (StringUtils.equals(catalogName, "sqlserver")) { columnSqls.add(column.getSourceType()); @@ -244,19 +254,7 @@ private String buildColumnIdentifySql( } else { columnSqls.add("NOT NULL"); } - // default value - // if (column.getDefaultValue() != null) { - // String defaultValue = "'" + column.getDefaultValue().toString() + "'"; - // if (StringUtils.equals(SqlServerType.BINARY.getName(), tyNameDef) - // && defaultValue.contains("b'")) { - // String rep = defaultValue.replace("b", "").replace("'", ""); - // defaultValue = "0x" + Integer.toHexString(Integer.parseInt(rep)); - // } else if (StringUtils.equals(SqlServerType.BIT.getName(), tyNameDef) - // && defaultValue.contains("b'")) { - // defaultValue = defaultValue.replace("b", "").replace("'", ""); - // } - // columnSqls.add("DEFAULT " + defaultValue); - // } + // comment if (column.getComment() != null) { columnComments.put(column.getName(), column.getComment()); @@ -267,7 +265,10 @@ private String buildColumnIdentifySql( private String buildPrimaryKeySql() { // .map(columnName -> "`" + columnName + "`") - String key = String.join(", ", primaryKey.getColumnNames()); + String key = + primaryKey.getColumnNames().stream() + .map(columnName -> "[" + columnName + "]") + .collect(Collectors.joining(", ")); // add sort type return String.format("PRIMARY KEY (%s)", key); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/CatalogUtils.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/CatalogUtils.java new file mode 100644 index 000000000000..4b60f92d80a3 --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/CatalogUtils.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.utils; + +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.dialectenum.FieldIdeEnum; + +import org.apache.commons.lang3.StringUtils; + +public class CatalogUtils { + public static String getFieldIde(String identifier, String fieldIde) { + if (StringUtils.isBlank(fieldIde)) { + return identifier; + } + switch (FieldIdeEnum.valueOf(fieldIde.toUpperCase())) { + case LOWERCASE: + return identifier.toLowerCase(); + case UPPERCASE: + return identifier.toUpperCase(); + default: + return identifier; + } + } + + public static String quoteIdentifier(String identifier, String fieldIde, String quote) { + if (identifier.contains(".")) { + String[] parts = identifier.split("\\."); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < parts.length - 1; i++) { + sb.append(quote).append(parts[i]).append(quote).append("."); + } + return sb.append(quote) + .append(getFieldIde(parts[parts.length - 1], fieldIde)) + .append(quote) + .toString(); + } + + return quote + getFieldIde(identifier, fieldIde) + quote; + } + + public static String quoteIdentifier(String identifier, String fieldIde) { + return getFieldIde(identifier, fieldIde); + } + + public static String quoteTableIdentifier(String identifier, String fieldIde) { + if (identifier.contains(".")) { + String[] parts = identifier.split("\\."); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < parts.length - 1; i++) { + sb.append(parts[i]).append("."); + } + return sb.append(getFieldIde(parts[parts.length - 1], fieldIde)).toString(); + } + + return getFieldIde(identifier, fieldIde); + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcConnectionConfig.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcConnectionConfig.java index 6e2147c03c86..555963af2cf7 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcConnectionConfig.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcConnectionConfig.java @@ -38,7 +38,6 @@ public class JdbcConnectionConfig implements Serializable { public boolean autoCommit = JdbcOptions.AUTO_COMMIT.defaultValue(); public int batchSize = JdbcOptions.BATCH_SIZE.defaultValue(); - public int batchIntervalMs = JdbcOptions.BATCH_INTERVAL_MS.defaultValue(); public String xaDataSourceClassName; @@ -55,7 +54,6 @@ public static JdbcConnectionConfig of(ReadonlyConfig config) { builder.maxRetries(config.get(JdbcOptions.MAX_RETRIES)); builder.connectionCheckTimeoutSeconds(config.get(JdbcOptions.CONNECTION_CHECK_TIMEOUT_SEC)); builder.batchSize(config.get(JdbcOptions.BATCH_SIZE)); - builder.batchIntervalMs(config.get(JdbcOptions.BATCH_INTERVAL_MS)); if (config.get(JdbcOptions.IS_EXACTLY_ONCE)) { builder.xaDataSourceClassName(config.get(JdbcOptions.XA_DATA_SOURCE_CLASS_NAME)); builder.maxCommitAttempts(config.get(JdbcOptions.MAX_COMMIT_ATTEMPTS)); @@ -104,10 +102,6 @@ public int getBatchSize() { return batchSize; } - public int getBatchIntervalMs() { - return batchIntervalMs; - } - public String getXaDataSourceClassName() { return xaDataSourceClassName; } @@ -136,7 +130,6 @@ public static final class Builder { private String query; private boolean autoCommit = JdbcOptions.AUTO_COMMIT.defaultValue(); private int batchSize = JdbcOptions.BATCH_SIZE.defaultValue(); - private int batchIntervalMs = JdbcOptions.BATCH_INTERVAL_MS.defaultValue(); private String xaDataSourceClassName; private int maxCommitAttempts = JdbcOptions.MAX_COMMIT_ATTEMPTS.defaultValue(); private int transactionTimeoutSec = JdbcOptions.TRANSACTION_TIMEOUT_SEC.defaultValue(); @@ -193,11 +186,6 @@ public Builder batchSize(int batchSize) { return this; } - public Builder batchIntervalMs(int batchIntervalMs) { - this.batchIntervalMs = batchIntervalMs; - return this; - } - public Builder xaDataSourceClassName(String xaDataSourceClassName) { this.xaDataSourceClassName = xaDataSourceClassName; return this; @@ -216,7 +204,6 @@ public Builder transactionTimeoutSec(int transactionTimeoutSec) { public JdbcConnectionConfig build() { JdbcConnectionConfig jdbcConnectionConfig = new JdbcConnectionConfig(); jdbcConnectionConfig.batchSize = this.batchSize; - jdbcConnectionConfig.batchIntervalMs = this.batchIntervalMs; jdbcConnectionConfig.driverName = this.driverName; jdbcConnectionConfig.compatibleMode = this.compatibleMode; jdbcConnectionConfig.maxRetries = this.maxRetries; diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcOptions.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcOptions.java index f5d1613c53ef..b01fc872f31f 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcOptions.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcOptions.java @@ -19,11 +19,11 @@ import org.apache.seatunnel.api.configuration.Option; import org.apache.seatunnel.api.configuration.Options; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.dialectenum.FieldIdeEnum; import java.math.BigDecimal; import java.util.List; -@SuppressWarnings("checkstyle:MagicNumber") public interface JdbcOptions { Option URL = Options.key("url").stringType().noDefaultValue().withDescription("url"); @@ -71,12 +71,6 @@ public interface JdbcOptions { "For queries that return a large number of objects, " + "you can configure the row fetch size used in the query to improve performance by reducing the number database hits required to satisfy the selection criteria. Zero means use jdbc default value."); - Option BATCH_INTERVAL_MS = - Options.key("batch_interval_ms") - .intType() - .defaultValue(0) - .withDescription("batch interval milliSecond"); - Option IS_EXACTLY_ONCE = Options.key("is_exactly_once") .booleanType() @@ -161,4 +155,10 @@ public interface JdbcOptions { .intType() .noDefaultValue() .withDescription("partition num"); + + Option FIELD_IDE = + Options.key("field_ide") + .enumType(FieldIdeEnum.class) + .noDefaultValue() + .withDescription("Whether case conversion is required"); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSinkConfig.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSinkConfig.java index af24a9a6b03a..874eb807f333 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSinkConfig.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSinkConfig.java @@ -51,11 +51,10 @@ public static JdbcSinkConfig of(ReadonlyConfig config) { config.getOptional(JdbcOptions.PRIMARY_KEYS).ifPresent(builder::primaryKeys); config.getOptional(JdbcOptions.DATABASE).ifPresent(builder::database); config.getOptional(JdbcOptions.TABLE).ifPresent(builder::table); - config.getOptional(ENABLE_UPSERT).ifPresent(builder::enableUpsert); - config.getOptional(IS_PRIMARY_KEY_UPDATED).ifPresent(builder::isPrimaryKeyUpdated); - config.getOptional(SUPPORT_UPSERT_BY_INSERT_ONLY) - .ifPresent(builder::supportUpsertByInsertOnly); - config.getOptional(JdbcOptions.QUERY).ifPresent(builder::simpleSql); + builder.enableUpsert(config.get(ENABLE_UPSERT)); + builder.isPrimaryKeyUpdated(config.get(IS_PRIMARY_KEY_UPDATED)); + builder.supportUpsertByInsertOnly(config.get(SUPPORT_UPSERT_BY_INSERT_ONLY)); + builder.simpleSql(config.get(JdbcOptions.QUERY)); return builder.build(); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/JdbcOutputFormat.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/JdbcOutputFormat.java index d47814f15314..a7d791252213 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/JdbcOutputFormat.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/JdbcOutputFormat.java @@ -34,11 +34,6 @@ import java.io.Serializable; import java.sql.Connection; import java.sql.SQLException; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Supplier; import static com.google.common.base.Preconditions.checkNotNull; @@ -58,9 +53,6 @@ public class JdbcOutputFormat> implem private transient E jdbcStatementExecutor; private transient int batchCount = 0; private transient volatile boolean closed = false; - - private transient ScheduledExecutorService scheduler; - private transient ScheduledFuture scheduledFuture; private transient volatile Exception flushException; public JdbcOutputFormat( @@ -83,37 +75,6 @@ public void open() throws IOException { e); } jdbcStatementExecutor = createAndOpenStatementExecutor(statementExecutorFactory); - - if (jdbcConnectionConfig.getBatchIntervalMs() != 0 - && jdbcConnectionConfig.getBatchSize() != 1) { - this.scheduler = - Executors.newScheduledThreadPool( - 1, - runnable -> { - AtomicInteger cnt = new AtomicInteger(0); - Thread thread = new Thread(runnable); - thread.setDaemon(true); - thread.setName( - "jdbc-upsert-output-format" + "-" + cnt.incrementAndGet()); - return thread; - }); - this.scheduledFuture = - this.scheduler.scheduleWithFixedDelay( - () -> { - synchronized (JdbcOutputFormat.this) { - if (!closed) { - try { - flush(); - } catch (Exception e) { - flushException = e; - } - } - } - }, - jdbcConnectionConfig.getBatchIntervalMs(), - jdbcConnectionConfig.getBatchIntervalMs(), - TimeUnit.MILLISECONDS); - } } private E createAndOpenStatementExecutor(StatementExecutorFactory statementExecutorFactory) { @@ -209,11 +170,6 @@ public synchronized void close() { if (!closed) { closed = true; - if (this.scheduledFuture != null) { - scheduledFuture.cancel(false); - this.scheduler.shutdown(); - } - if (batchCount > 0) { try { flush(); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/converter/AbstractJdbcRowConverter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/converter/AbstractJdbcRowConverter.java index 07aa7959946e..3d20bd9562e6 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/converter/AbstractJdbcRowConverter.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/converter/AbstractJdbcRowConverter.java @@ -43,7 +43,6 @@ public abstract class AbstractJdbcRowConverter implements JdbcRowConverter { public AbstractJdbcRowConverter() {} @Override - @SuppressWarnings("checkstyle:Indentation") public SeaTunnelRow toInternal(ResultSet rs, SeaTunnelRowType typeInfo) throws SQLException { Object[] fields = new Object[typeInfo.getTotalFields()]; for (int fieldIndex = 0; fieldIndex < typeInfo.getTotalFields(); fieldIndex++) { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialect.java index 8a0b31a5eeb9..e0cf5252a60b 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialect.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialect.java @@ -20,6 +20,9 @@ import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceConfig; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.converter.JdbcRowConverter; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.dialectenum.FieldIdeEnum; + +import org.apache.commons.lang3.StringUtils; import java.io.Serializable; import java.sql.Connection; @@ -68,9 +71,13 @@ default String hashModForField(String fieldName, int mod) { default String quoteIdentifier(String identifier) { return identifier; } + /** Quotes the identifier for database name or field name */ + default String quoteDatabaseIdentifier(String identifier) { + return identifier; + } default String tableIdentifier(String database, String tableName) { - return quoteIdentifier(database) + "." + quoteIdentifier(tableName); + return quoteDatabaseIdentifier(database) + "." + quoteIdentifier(tableName); } /** @@ -219,4 +226,18 @@ default ResultSetMetaData getResultSetMetaData( default String extractTableName(TablePath tablePath) { return tablePath.getSchemaAndTableName(); } + + default String getFieldIde(String identifier, String fieldIde) { + if (StringUtils.isEmpty(fieldIde)) { + return identifier; + } + switch (FieldIdeEnum.valueOf(fieldIde.toUpperCase())) { + case LOWERCASE: + return identifier.toLowerCase(); + case UPPERCASE: + return identifier.toUpperCase(); + default: + return identifier; + } + } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialectFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialectFactory.java index 3d66de659092..5439937f53d9 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialectFactory.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialectFactory.java @@ -44,7 +44,7 @@ public interface JdbcDialectFactory { * @param compatibleMode The compatible mode * @return a new instance of {@link JdbcDialect} */ - default JdbcDialect create(String compatibleMode) { + default JdbcDialect create(String compatibleMode, String fieldId) { return create(); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialectLoader.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialectLoader.java index b49df35ff3f8..350a22e20c6c 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialectLoader.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialectLoader.java @@ -36,6 +36,10 @@ public final class JdbcDialectLoader { private JdbcDialectLoader() {} + public static JdbcDialect load(String url, String compatibleMode) { + return load(url, compatibleMode, ""); + } + /** * Loads the unique JDBC Dialect that can handle the given database url. * @@ -45,7 +49,7 @@ private JdbcDialectLoader() {} * unambiguously process the given database URL. * @return The loaded dialect. */ - public static JdbcDialect load(String url, String compatibleMode) { + public static JdbcDialect load(String url, String compatibleMode, String fieldIde) { ClassLoader cl = Thread.currentThread().getContextClassLoader(); List foundFactories = discoverFactories(cl); @@ -90,7 +94,7 @@ public static JdbcDialect load(String url, String compatibleMode) { .collect(Collectors.joining("\n")))); } - return matchingFactories.get(0).create(compatibleMode); + return matchingFactories.get(0).create(compatibleMode, fieldIde); } private static List discoverFactories(ClassLoader classLoader) { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/db2/DB2TypeMapper.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/db2/DB2TypeMapper.java index 06d534ea2d32..7df921027690 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/db2/DB2TypeMapper.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/db2/DB2TypeMapper.java @@ -83,7 +83,6 @@ public class DB2TypeMapper implements JdbcDialectTypeMapper { // other private static final String DB2_XML = "XML"; - @SuppressWarnings("checkstyle:MagicNumber") @Override public SeaTunnelDataType mapping(ResultSetMetaData metadata, int colIndex) throws SQLException { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/dialectenum/FieldIdeEnum.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/dialectenum/FieldIdeEnum.java new file mode 100644 index 000000000000..39f95210623b --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/dialectenum/FieldIdeEnum.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.dialectenum; + +public enum FieldIdeEnum { + ORIGINAL("original"), // Original string form + UPPERCASE("uppercase"), // Convert to uppercase + LOWERCASE("lowercase"); // Convert to lowercase + + private final String value; + + FieldIdeEnum(String value) { + this.value = value; + } + + public String getValue() { + return value; + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/dm/DmdbDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/dm/DmdbDialect.java index 00845cf11a30..67db4fb7c604 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/dm/DmdbDialect.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/dm/DmdbDialect.java @@ -17,6 +17,7 @@ package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.dm; +import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.converter.JdbcRowConverter; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectTypeMapper; @@ -101,4 +102,9 @@ public Optional getUpsertStatement( return Optional.of(upsertSQL); } + + @Override + public String extractTableName(TablePath tablePath) { + return tablePath.getTableName(); + } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/dm/DmdbTypeMapper.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/dm/DmdbTypeMapper.java index c9de46723f62..d6d630bc9107 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/dm/DmdbTypeMapper.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/dm/DmdbTypeMapper.java @@ -102,7 +102,6 @@ public class DmdbTypeMapper implements JdbcDialectTypeMapper { public static final String DM_LONGVARBINARY = "LONGVARBINARY"; @Override - @SuppressWarnings("checkstyle:MagicNumber") public SeaTunnelDataType mapping(ResultSetMetaData metadata, int colIndex) throws SQLException { String dmdbType = metadata.getColumnTypeName(colIndex).toUpperCase(); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/gbase8a/Gbase8aTypeMapper.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/gbase8a/Gbase8aTypeMapper.java index 9686c3cdddab..a4c598f97510 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/gbase8a/Gbase8aTypeMapper.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/gbase8a/Gbase8aTypeMapper.java @@ -61,7 +61,6 @@ public class Gbase8aTypeMapper implements JdbcDialectTypeMapper { private static final String GBASE8A_BLOB = "BLOB"; private static final String GBASE8A_TEXT = "TEXT"; - @SuppressWarnings("checkstyle:MagicNumber") @Override public SeaTunnelDataType mapping(ResultSetMetaData metadata, int colIndex) throws SQLException { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/kingbase/KingbaseDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/kingbase/KingbaseDialect.java new file mode 100644 index 000000000000..2f6d56610635 --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/kingbase/KingbaseDialect.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.kingbase; + +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.converter.JdbcRowConverter; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectTypeMapper; + +import java.util.Arrays; +import java.util.Optional; +import java.util.stream.Collectors; + +public class KingbaseDialect implements JdbcDialect { + + @Override + public String dialectName() { + return "Kingbase"; + } + + @Override + public JdbcRowConverter getRowConverter() { + return new KingbaseJdbcRowConverter(); + } + + @Override + public JdbcDialectTypeMapper getJdbcDialectTypeMapper() { + return new KingbaseTypeMapper(); + } + + @Override + public Optional getUpsertStatement( + String database, String tableName, String[] fieldNames, String[] uniqueKeyFields) { + String uniqueColumns = + Arrays.stream(uniqueKeyFields) + .map(this::quoteIdentifier) + .collect(Collectors.joining(", ")); + String updateClause = + Arrays.stream(fieldNames) + .map( + fieldName -> + quoteIdentifier(fieldName) + + "=EXCLUDED." + + quoteIdentifier(fieldName)) + .collect(Collectors.joining(", ")); + String upsertSQL = + String.format( + "%s ON CONFLICT (%s) DO UPDATE SET %s", + getInsertIntoStatement(database, tableName, fieldNames), + uniqueColumns, + updateClause); + return Optional.of(upsertSQL); + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/kingbase/KingbaseDialectFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/kingbase/KingbaseDialectFactory.java new file mode 100644 index 000000000000..f99986103515 --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/kingbase/KingbaseDialectFactory.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.kingbase; + +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectFactory; + +import com.google.auto.service.AutoService; + +/** Factory for {@link KingbaseDialect}. */ +@AutoService(JdbcDialectFactory.class) +public class KingbaseDialectFactory implements JdbcDialectFactory { + + @Override + public boolean acceptsURL(String url) { + return url.startsWith("jdbc:kingbase8:"); + } + + @Override + public JdbcDialect create() { + return new KingbaseDialect(); + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/kingbase/KingbaseJdbcRowConverter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/kingbase/KingbaseJdbcRowConverter.java new file mode 100644 index 000000000000..9577e12f6200 --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/kingbase/KingbaseJdbcRowConverter.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.kingbase; + +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.common.exception.CommonErrorCode; +import org.apache.seatunnel.connectors.seatunnel.jdbc.exception.JdbcConnectorException; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.converter.AbstractJdbcRowConverter; + +import java.math.BigDecimal; +import java.sql.Date; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Time; +import java.sql.Timestamp; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.util.Optional; + +public class KingbaseJdbcRowConverter extends AbstractJdbcRowConverter { + + @Override + public String converterName() { + return "KingBase"; + } + + @Override + @SuppressWarnings("checkstyle:Indentation") + public SeaTunnelRow toInternal(ResultSet rs, SeaTunnelRowType typeInfo) throws SQLException { + Object[] fields = new Object[typeInfo.getTotalFields()]; + for (int fieldIndex = 0; fieldIndex < typeInfo.getTotalFields(); fieldIndex++) { + SeaTunnelDataType seaTunnelDataType = typeInfo.getFieldType(fieldIndex); + int resultSetIndex = fieldIndex + 1; + switch (seaTunnelDataType.getSqlType()) { + case STRING: + fields[fieldIndex] = rs.getString(resultSetIndex); + break; + case BOOLEAN: + fields[fieldIndex] = rs.getBoolean(resultSetIndex); + break; + case TINYINT: + fields[fieldIndex] = rs.getByte(resultSetIndex); + break; + case SMALLINT: + fields[fieldIndex] = rs.getShort(resultSetIndex); + break; + case INT: + fields[fieldIndex] = rs.getInt(resultSetIndex); + break; + case BIGINT: + fields[fieldIndex] = rs.getLong(resultSetIndex); + break; + case FLOAT: + fields[fieldIndex] = rs.getFloat(resultSetIndex); + break; + case DOUBLE: + fields[fieldIndex] = rs.getDouble(resultSetIndex); + break; + case DECIMAL: + fields[fieldIndex] = rs.getBigDecimal(resultSetIndex); + break; + case DATE: + Date sqlDate = rs.getDate(resultSetIndex); + fields[fieldIndex] = + Optional.ofNullable(sqlDate).map(Date::toLocalDate).orElse(null); + break; + case TIME: + Time sqlTime = rs.getTime(resultSetIndex); + fields[fieldIndex] = + Optional.ofNullable(sqlTime).map(Time::toLocalTime).orElse(null); + break; + case TIMESTAMP: + Timestamp sqlTimestamp = rs.getTimestamp(resultSetIndex); + fields[fieldIndex] = + Optional.ofNullable(sqlTimestamp) + .map(Timestamp::toLocalDateTime) + .orElse(null); + break; + case BYTES: + fields[fieldIndex] = rs.getBytes(resultSetIndex); + break; + case NULL: + fields[fieldIndex] = null; + break; + case ROW: + case MAP: + case ARRAY: + default: + throw new JdbcConnectorException( + CommonErrorCode.UNSUPPORTED_DATA_TYPE, + "Unexpected value: " + seaTunnelDataType); + } + } + return new SeaTunnelRow(fields); + } + + @Override + public PreparedStatement toExternal( + SeaTunnelRowType rowType, SeaTunnelRow row, PreparedStatement statement) + throws SQLException { + for (int fieldIndex = 0; fieldIndex < rowType.getTotalFields(); fieldIndex++) { + SeaTunnelDataType seaTunnelDataType = rowType.getFieldType(fieldIndex); + int statementIndex = fieldIndex + 1; + Object fieldValue = row.getField(fieldIndex); + if (fieldValue == null) { + statement.setObject(statementIndex, null); + continue; + } + + switch (seaTunnelDataType.getSqlType()) { + case STRING: + statement.setString(statementIndex, (String) row.getField(fieldIndex)); + break; + case BOOLEAN: + statement.setBoolean(statementIndex, (Boolean) row.getField(fieldIndex)); + break; + case TINYINT: + statement.setByte(statementIndex, (Byte) row.getField(fieldIndex)); + break; + case SMALLINT: + statement.setShort(statementIndex, (Short) row.getField(fieldIndex)); + break; + case INT: + statement.setInt(statementIndex, (Integer) row.getField(fieldIndex)); + break; + case BIGINT: + statement.setLong(statementIndex, (Long) row.getField(fieldIndex)); + break; + case FLOAT: + statement.setFloat(statementIndex, (Float) row.getField(fieldIndex)); + break; + case DOUBLE: + statement.setDouble(statementIndex, (Double) row.getField(fieldIndex)); + break; + case DECIMAL: + statement.setBigDecimal(statementIndex, (BigDecimal) row.getField(fieldIndex)); + break; + case DATE: + LocalDate localDate = (LocalDate) row.getField(fieldIndex); + statement.setDate(statementIndex, java.sql.Date.valueOf(localDate)); + break; + case TIME: + LocalTime localTime = (LocalTime) row.getField(fieldIndex); + statement.setTime(statementIndex, java.sql.Time.valueOf(localTime)); + break; + case TIMESTAMP: + LocalDateTime localDateTime = (LocalDateTime) row.getField(fieldIndex); + statement.setTimestamp( + statementIndex, java.sql.Timestamp.valueOf(localDateTime)); + break; + case BYTES: + statement.setBytes(statementIndex, (byte[]) row.getField(fieldIndex)); + break; + case NULL: + statement.setNull(statementIndex, java.sql.Types.NULL); + break; + case ROW: + case MAP: + case ARRAY: + default: + throw new JdbcConnectorException( + CommonErrorCode.UNSUPPORTED_DATA_TYPE, + "Unexpected value: " + seaTunnelDataType); + } + } + return statement; + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/kingbase/KingbaseTypeMapper.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/kingbase/KingbaseTypeMapper.java new file mode 100644 index 000000000000..439c8fc4202a --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/kingbase/KingbaseTypeMapper.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.kingbase; + +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.DecimalType; +import org.apache.seatunnel.api.table.type.LocalTimeType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.common.exception.CommonErrorCode; +import org.apache.seatunnel.connectors.seatunnel.jdbc.exception.JdbcConnectorException; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectTypeMapper; + +import java.sql.ResultSetMetaData; +import java.sql.SQLException; + +public class KingbaseTypeMapper implements JdbcDialectTypeMapper { + + private static final String KB_SMALLSERIAL = "SMALLSERIAL"; + private static final String KB_SERIAL = "SERIAL"; + private static final String KB_BIGSERIAL = "BIGSERIAL"; + private static final String KB_BYTEA = "BYTEA"; + private static final String KB_BYTEA_ARRAY = "_BYTEA"; + private static final String KB_SMALLINT = "INT2"; + private static final String KB_SMALLINT_ARRAY = "_INT2"; + private static final String KB_INTEGER = "INT4"; + private static final String KB_INTEGER_ARRAY = "_INT4"; + private static final String KB_BIGINT = "INT8"; + private static final String KB_BIGINT_ARRAY = "_INT8"; + private static final String KB_REAL = "FLOAT4"; + private static final String KB_REAL_ARRAY = "_FLOAT4"; + private static final String KB_DOUBLE_PRECISION = "FLOAT8"; + private static final String KB_DOUBLE_PRECISION_ARRAY = "_FLOAT8"; + private static final String KB_NUMERIC = "NUMERIC"; + private static final String KB_NUMERIC_ARRAY = "_NUMERIC"; + private static final String KB_BOOLEAN = "BOOL"; + private static final String KB_BOOLEAN_ARRAY = "_BOOL"; + private static final String KB_TIMESTAMP = "TIMESTAMP"; + private static final String KB_TIMESTAMP_ARRAY = "_TIMESTAMP"; + private static final String KB_TIMESTAMPTZ = "TIMESTAMPTZ"; + private static final String KB_TIMESTAMPTZ_ARRAY = "_TIMESTAMPTZ"; + private static final String KB_DATE = "DATE"; + private static final String KB_DATE_ARRAY = "_DATE"; + private static final String KB_TIME = "TIME"; + private static final String KB_TIME_ARRAY = "_TIME"; + private static final String KB_TEXT = "TEXT"; + private static final String KB_TEXT_ARRAY = "_TEXT"; + private static final String KB_CHAR = "BPCHAR"; + private static final String KB_CHAR_ARRAY = "_BPCHAR"; + private static final String KB_CHARACTER = "CHARACTER"; + + private static final String KB_CHARACTER_VARYING = "VARCHAR"; + private static final String KB_CHARACTER_VARYING_ARRAY = "_VARCHAR"; + private static final String KB_JSON = "JSON"; + private static final String KB_JSONB = "JSONB"; + + @SuppressWarnings("checkstyle:MagicNumber") + @Override + public SeaTunnelDataType mapping(ResultSetMetaData metadata, int colIndex) + throws SQLException { + + String kbType = metadata.getColumnTypeName(colIndex).toUpperCase(); + + int precision = metadata.getPrecision(colIndex); + + switch (kbType) { + case KB_BOOLEAN: + return BasicType.BOOLEAN_TYPE; + case KB_SMALLINT: + return BasicType.SHORT_TYPE; + case KB_SMALLSERIAL: + case KB_INTEGER: + case KB_SERIAL: + return BasicType.INT_TYPE; + case KB_BIGINT: + case KB_BIGSERIAL: + return BasicType.LONG_TYPE; + case KB_REAL: + return BasicType.FLOAT_TYPE; + case KB_DOUBLE_PRECISION: + return BasicType.DOUBLE_TYPE; + case KB_NUMERIC: + // see SPARK-26538: handle numeric without explicit precision and scale. + if (precision > 0) { + return new DecimalType(precision, metadata.getScale(colIndex)); + } + return new DecimalType(38, 18); + case KB_CHAR: + case KB_CHARACTER: + case KB_CHARACTER_VARYING: + case KB_TEXT: + return BasicType.STRING_TYPE; + case KB_TIMESTAMP: + return LocalTimeType.LOCAL_DATE_TIME_TYPE; + case KB_TIME: + return LocalTimeType.LOCAL_TIME_TYPE; + case KB_DATE: + return LocalTimeType.LOCAL_DATE_TYPE; + case KB_CHAR_ARRAY: + case KB_CHARACTER_VARYING_ARRAY: + case KB_TEXT_ARRAY: + case KB_DOUBLE_PRECISION_ARRAY: + case KB_REAL_ARRAY: + case KB_BIGINT_ARRAY: + case KB_SMALLINT_ARRAY: + case KB_INTEGER_ARRAY: + case KB_BYTEA_ARRAY: + case KB_BOOLEAN_ARRAY: + case KB_TIMESTAMP_ARRAY: + case KB_NUMERIC_ARRAY: + case KB_TIMESTAMPTZ: + case KB_TIMESTAMPTZ_ARRAY: + case KB_TIME_ARRAY: + case KB_DATE_ARRAY: + case KB_JSONB: + case KB_JSON: + case KB_BYTEA: + default: + throw new JdbcConnectorException( + CommonErrorCode.UNSUPPORTED_OPERATION, + String.format("Doesn't support KingBaseES type '%s' yet", kbType)); + } + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MySqlDialectFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MySqlDialectFactory.java index 10047311b933..a4f89a4dc857 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MySqlDialectFactory.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MySqlDialectFactory.java @@ -22,6 +22,8 @@ import com.google.auto.service.AutoService; +import javax.annotation.Nonnull; + /** Factory for {@link MysqlDialect}. */ @AutoService(JdbcDialectFactory.class) public class MySqlDialectFactory implements JdbcDialectFactory { @@ -34,4 +36,9 @@ public boolean acceptsURL(String url) { public JdbcDialect create() { return new MysqlDialect(); } + + @Override + public JdbcDialect create(@Nonnull String compatibleMode, String fieldIde) { + return new MysqlDialect(fieldIde); + } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MySqlTypeMapper.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MySqlTypeMapper.java index e2fed97da2a7..d1b70d83bb20 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MySqlTypeMapper.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MySqlTypeMapper.java @@ -87,7 +87,6 @@ public class MySqlTypeMapper implements JdbcDialectTypeMapper { private static final String MYSQL_VARBINARY = "VARBINARY"; private static final String MYSQL_GEOMETRY = "GEOMETRY"; - @SuppressWarnings("checkstyle:MagicNumber") @Override public SeaTunnelDataType mapping(ResultSetMetaData metadata, int colIndex) throws SQLException { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MysqlDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MysqlDialect.java index c71dc3f76a15..1ae69a6131f5 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MysqlDialect.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MysqlDialect.java @@ -21,6 +21,7 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.converter.JdbcRowConverter; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectTypeMapper; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.dialectenum.FieldIdeEnum; import java.sql.Connection; import java.sql.PreparedStatement; @@ -31,6 +32,14 @@ import java.util.stream.Collectors; public class MysqlDialect implements JdbcDialect { + public String fieldIde = FieldIdeEnum.ORIGINAL.getValue(); + + public MysqlDialect() {} + + public MysqlDialect(String fieldIde) { + this.fieldIde = fieldIde; + } + @Override public String dialectName() { return "MySQL"; @@ -48,6 +57,11 @@ public JdbcDialectTypeMapper getJdbcDialectTypeMapper() { @Override public String quoteIdentifier(String identifier) { + return "`" + getFieldIde(identifier, fieldIde) + "`"; + } + + @Override + public String quoteDatabaseIdentifier(String identifier) { return "`" + identifier + "`"; } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseDialectFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseDialectFactory.java index 66df84205ed1..b3a456870cc3 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseDialectFactory.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseDialectFactory.java @@ -40,7 +40,7 @@ public JdbcDialect create() { } @Override - public JdbcDialect create(@Nonnull String compatibleMode) { + public JdbcDialect create(@Nonnull String compatibleMode, String fieldIde) { if ("oracle".equalsIgnoreCase(compatibleMode)) { return new OracleDialect(); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleDialect.java index 7edd935e780d..e8e583dc143c 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleDialect.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleDialect.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.converter.JdbcRowConverter; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectTypeMapper; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.dialectenum.FieldIdeEnum; import java.sql.Connection; import java.sql.PreparedStatement; @@ -33,6 +34,13 @@ public class OracleDialect implements JdbcDialect { private static final int DEFAULT_ORACLE_FETCH_SIZE = 128; + public String fieldIde = FieldIdeEnum.ORIGINAL.getValue(); + + public OracleDialect(String fieldIde) { + this.fieldIde = fieldIde; + } + + public OracleDialect() {} @Override public String dialectName() { @@ -56,7 +64,18 @@ public JdbcDialectTypeMapper getJdbcDialectTypeMapper() { @Override public String quoteIdentifier(String identifier) { - return identifier; + if (identifier.contains(".")) { + String[] parts = identifier.split("\\."); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < parts.length - 1; i++) { + sb.append("\"").append(parts[i]).append("\"").append("."); + } + return sb.append("\"") + .append(getFieldIde(parts[parts.length - 1], fieldIde)) + .append("\"") + .toString(); + } + return "\"" + getFieldIde(identifier, fieldIde) + "\""; } @Override diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleDialectFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleDialectFactory.java index 168dc4d89022..121098c46147 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleDialectFactory.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleDialectFactory.java @@ -22,6 +22,8 @@ import com.google.auto.service.AutoService; +import javax.annotation.Nonnull; + /** Factory for {@link OracleDialect}. */ @AutoService(JdbcDialectFactory.class) public class OracleDialectFactory implements JdbcDialectFactory { @@ -34,4 +36,9 @@ public boolean acceptsURL(String url) { public JdbcDialect create() { return new OracleDialect(); } + + @Override + public JdbcDialect create(@Nonnull String compatibleMode, String fieldIde) { + return new OracleDialect(fieldIde); + } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeMapper.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeMapper.java index 1ce9c3a72b91..c747ba08c681 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeMapper.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeMapper.java @@ -68,7 +68,6 @@ public class OracleTypeMapper implements JdbcDialectTypeMapper { private static final String ORACLE_RAW = "RAW"; private static final String ORACLE_LONG_RAW = "LONG RAW"; - @SuppressWarnings("checkstyle:MagicNumber") @Override public SeaTunnelDataType mapping(ResultSetMetaData metadata, int colIndex) throws SQLException { @@ -87,9 +86,10 @@ public SeaTunnelDataType mapping(ResultSetMetaData metadata, int colIndex) if (scale == 0) { if (precision <= 9) { return BasicType.INT_TYPE; - } - if (precision <= 18) { + } else if (precision <= 18) { return BasicType.LONG_TYPE; + } else if (precision <= 38) { + return new DecimalType(38, 0); } } return new DecimalType(38, 18); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/phoenix/PhoenixTypeMapper.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/phoenix/PhoenixTypeMapper.java index 32b72574e463..f97d363c977a 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/phoenix/PhoenixTypeMapper.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/phoenix/PhoenixTypeMapper.java @@ -73,7 +73,6 @@ public class PhoenixTypeMapper implements JdbcDialectTypeMapper { private static final String PHOENIX_BINARY = "BINARY"; private static final String PHOENIX_VARBINARY = "VARBINARY"; - @SuppressWarnings("checkstyle:MagicNumber") @Override public SeaTunnelDataType mapping(ResultSetMetaData metadata, int colIndex) throws SQLException { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psql/PostgresDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psql/PostgresDialect.java index b36a28a5a609..f206589af59e 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psql/PostgresDialect.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psql/PostgresDialect.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.converter.JdbcRowConverter; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectTypeMapper; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.dialectenum.FieldIdeEnum; import java.sql.Connection; import java.sql.PreparedStatement; @@ -33,6 +34,14 @@ public class PostgresDialect implements JdbcDialect { public static final int DEFAULT_POSTGRES_FETCH_SIZE = 128; + public String fieldIde = FieldIdeEnum.ORIGINAL.getValue(); + + public PostgresDialect() {} + + public PostgresDialect(String fieldIde) { + this.fieldIde = fieldIde; + } + @Override public String dialectName() { return "PostgreSQL"; @@ -88,4 +97,32 @@ public PreparedStatement creatPreparedStatement( } return statement; } + + @Override + public String tableIdentifier(String database, String tableName) { + // resolve pg database name upper or lower not recognised + return quoteDatabaseIdentifier(database) + "." + quoteIdentifier(tableName); + } + + @Override + public String quoteIdentifier(String identifier) { + if (identifier.contains(".")) { + String[] parts = identifier.split("\\."); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < parts.length - 1; i++) { + sb.append("\"").append(parts[i]).append("\"").append("."); + } + return sb.append("\"") + .append(getFieldIde(parts[parts.length - 1], fieldIde)) + .append("\"") + .toString(); + } + + return "\"" + getFieldIde(identifier, fieldIde) + "\""; + } + + @Override + public String quoteDatabaseIdentifier(String identifier) { + return "\"" + identifier + "\""; + } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psql/PostgresDialectFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psql/PostgresDialectFactory.java index 857c85290df6..59dc0b45c682 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psql/PostgresDialectFactory.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psql/PostgresDialectFactory.java @@ -39,10 +39,10 @@ public JdbcDialect create() { } @Override - public JdbcDialect create(@Nonnull String compatibleMode) { + public JdbcDialect create(@Nonnull String compatibleMode, String fieldIde) { if ("postgresLow".equalsIgnoreCase(compatibleMode)) { - return new PostgresLowDialect(); + return new PostgresLowDialect(fieldIde); } - return new PostgresDialect(); + return new PostgresDialect(fieldIde); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psql/PostgresJdbcRowConverter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psql/PostgresJdbcRowConverter.java index 2b7dc47a9e0b..13ace89a0461 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psql/PostgresJdbcRowConverter.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psql/PostgresJdbcRowConverter.java @@ -43,7 +43,6 @@ public String converterName() { } @Override - @SuppressWarnings("checkstyle:Indentation") public SeaTunnelRow toInternal(ResultSet rs, SeaTunnelRowType typeInfo) throws SQLException { Object[] fields = new Object[typeInfo.getTotalFields()]; for (int fieldIndex = 0; fieldIndex < typeInfo.getTotalFields(); fieldIndex++) { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psql/PostgresTypeMapper.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psql/PostgresTypeMapper.java index 25004168d804..1d82f687c1b0 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psql/PostgresTypeMapper.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psql/PostgresTypeMapper.java @@ -86,8 +86,9 @@ public class PostgresTypeMapper implements JdbcDialectTypeMapper { private static final String PG_CHARACTER_VARYING_ARRAY = "_varchar"; private static final String PG_GEOMETRY = "geometry"; private static final String PG_GEOGRAPHY = "geography"; + private static final String PG_JSON = "json"; + private static final String PG_JSONB = "jsonb"; - @SuppressWarnings("checkstyle:MagicNumber") @Override public SeaTunnelDataType mapping(ResultSetMetaData metadata, int colIndex) throws SQLException { @@ -139,6 +140,8 @@ public SeaTunnelDataType mapping(ResultSetMetaData metadata, int colIndex) case PG_TEXT: case PG_GEOMETRY: case PG_GEOGRAPHY: + case PG_JSON: + case PG_JSONB: return BasicType.STRING_TYPE; case PG_CHAR_ARRAY: case PG_CHARACTER_ARRAY: diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psqllow/PostgresLowDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psqllow/PostgresLowDialect.java index e367207ffa20..9100382628dc 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psqllow/PostgresLowDialect.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/psqllow/PostgresLowDialect.java @@ -22,6 +22,11 @@ import java.util.Optional; public class PostgresLowDialect extends PostgresDialect { + + public PostgresLowDialect(String fieldIde) { + this.fieldIde = fieldIde; + } + @Override public Optional getUpsertStatement( String database, String tableName, String[] fieldNames, String[] uniqueKeyFields) { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerDialect.java index 2121369e22ab..792c03bd7606 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerDialect.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerDialect.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.converter.JdbcRowConverter; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectTypeMapper; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.dialectenum.FieldIdeEnum; import java.util.Arrays; import java.util.List; @@ -27,6 +28,15 @@ import java.util.stream.Collectors; public class SqlServerDialect implements JdbcDialect { + + public String fieldIde = FieldIdeEnum.ORIGINAL.getValue(); + + public SqlServerDialect() {} + + public SqlServerDialect(String fieldIde) { + this.fieldIde = fieldIde; + } + @Override public String dialectName() { return "Sqlserver"; @@ -105,4 +115,26 @@ public Optional getUpsertStatement( return Optional.of(upsertSQL); } + + @Override + public String quoteIdentifier(String identifier) { + if (identifier.contains(".")) { + String[] parts = identifier.split("\\."); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < parts.length - 1; i++) { + sb.append("[").append(parts[i]).append("]").append("."); + } + return sb.append("[") + .append(getFieldIde(parts[parts.length - 1], fieldIde)) + .append("]") + .toString(); + } + + return "[" + getFieldIde(identifier, fieldIde) + "]"; + } + + @Override + public String quoteDatabaseIdentifier(String identifier) { + return "[" + identifier + "]"; + } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerDialectFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerDialectFactory.java index d8fce3c43c14..d7dae4efd573 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerDialectFactory.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerDialectFactory.java @@ -22,6 +22,8 @@ import com.google.auto.service.AutoService; +import javax.annotation.Nonnull; + /** Factory for {@link SqlServerDialect}. */ @AutoService(JdbcDialectFactory.class) public class SqlServerDialectFactory implements JdbcDialectFactory { @@ -34,4 +36,9 @@ public boolean acceptsURL(String url) { public JdbcDialect create() { return new SqlServerDialect(); } + + @Override + public JdbcDialect create(@Nonnull String compatibleMode, String fieldIde) { + return new SqlServerDialect(fieldIde); + } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlserverTypeMapper.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlserverTypeMapper.java index 4a8978cb247e..3cd04e5e051f 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlserverTypeMapper.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlserverTypeMapper.java @@ -73,7 +73,6 @@ public class SqlserverTypeMapper implements JdbcDialectTypeMapper { private static final String SQLSERVER_VARBINARY = "VARBINARY"; private static final String SQLSERVER_IMAGE = "IMAGE"; - @SuppressWarnings("checkstyle:MagicNumber") @Override public SeaTunnelDataType mapping(ResultSetMetaData metadata, int colIndex) throws SQLException { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/tablestore/TablestoreTypeMapper.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/tablestore/TablestoreTypeMapper.java index 2a843418d1e5..3dcfa41ccf41 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/tablestore/TablestoreTypeMapper.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/tablestore/TablestoreTypeMapper.java @@ -49,7 +49,6 @@ public class TablestoreTypeMapper implements JdbcDialectTypeMapper { private static final String TABLESTORE_VARBINARY = "VARBINARY"; private static final String TABLESTORE_MEDIUMBLOB = "MEDIUMBLOB"; - @SuppressWarnings("checkstyle:MagicNumber") @Override public SeaTunnelDataType mapping(ResultSetMetaData metadata, int colIndex) throws SQLException { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/executor/FieldNamedPreparedStatement.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/executor/FieldNamedPreparedStatement.java index b8ba7e2fe105..29c98c793871 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/executor/FieldNamedPreparedStatement.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/executor/FieldNamedPreparedStatement.java @@ -47,8 +47,8 @@ import java.util.List; import java.util.Map; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkNotNull; @RequiredArgsConstructor @Slf4j diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/split/JdbcNumericBetweenParametersProvider.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/split/JdbcNumericBetweenParametersProvider.java index ced1d2831f1a..4bf8834f6d5a 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/split/JdbcNumericBetweenParametersProvider.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/split/JdbcNumericBetweenParametersProvider.java @@ -21,8 +21,8 @@ import java.math.BigDecimal; import java.math.RoundingMode; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkState; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkState; /** * This query parameters generator is an helper class to parameterize from/to queries on a numeric diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/xa/SemanticXidGenerator.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/xa/SemanticXidGenerator.java index 39c72267c68d..578b5de80829 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/xa/SemanticXidGenerator.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/xa/SemanticXidGenerator.java @@ -25,7 +25,7 @@ import java.security.SecureRandom; import java.util.Arrays; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; /** * Generates {@link Xid} from: diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/xa/XidImpl.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/xa/XidImpl.java index 299ba28cdbfe..05dbe6d3c945 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/xa/XidImpl.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/xa/XidImpl.java @@ -26,7 +26,7 @@ import java.util.Arrays; import java.util.Objects; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; /** * A simple {@link Xid} implementation that stores branch and global transaction identifiers as byte diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcExactlyOnceSinkWriter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcExactlyOnceSinkWriter.java index 60861891b576..74a9bd97bfe1 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcExactlyOnceSinkWriter.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcExactlyOnceSinkWriter.java @@ -50,8 +50,8 @@ import java.util.List; import java.util.Optional; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkState; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkState; public class JdbcExactlyOnceSinkWriter implements SinkWriter { private static final Logger LOG = LoggerFactory.getLogger(JdbcExactlyOnceSinkWriter.class); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSink.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSink.java index c23619b5aade..bbb776e486a5 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSink.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSink.java @@ -38,10 +38,13 @@ import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.utils.CatalogUtils; +import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions; import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSinkConfig; import org.apache.seatunnel.connectors.seatunnel.jdbc.exception.JdbcConnectorException; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectLoader; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.dialectenum.FieldIdeEnum; import org.apache.seatunnel.connectors.seatunnel.jdbc.state.JdbcAggregatedCommitInfo; import org.apache.seatunnel.connectors.seatunnel.jdbc.state.JdbcSinkState; import org.apache.seatunnel.connectors.seatunnel.jdbc.state.XidInfo; @@ -107,7 +110,10 @@ public void prepare(Config pluginConfig) throws PrepareFailException { this.dialect = JdbcDialectLoader.load( jdbcSinkConfig.getJdbcConnectionConfig().getUrl(), - jdbcSinkConfig.getJdbcConnectionConfig().getCompatibleMode()); + jdbcSinkConfig.getJdbcConnectionConfig().getCompatibleMode(), + config.get(JdbcOptions.FIELD_IDE) == null + ? null + : config.get(JdbcOptions.FIELD_IDE).getValue()); this.dataSaveMode = DataSaveMode.KEEP_SCHEMA_AND_DATA; } @@ -206,14 +212,21 @@ public void handleSaveMode(DataSaveMode saveMode) { catalogFactory.factoryIdentifier(), ReadonlyConfig.fromMap(new HashMap<>(catalogOptions)))) { catalog.open(); + FieldIdeEnum fieldIdeEnumEnum = config.get(JdbcOptions.FIELD_IDE); + String fieldIde = + fieldIdeEnumEnum == null + ? FieldIdeEnum.ORIGINAL.getValue() + : fieldIdeEnumEnum.getValue(); TablePath tablePath = TablePath.of( jdbcSinkConfig.getDatabase() + "." - + jdbcSinkConfig.getTable()); + + CatalogUtils.quoteTableIdentifier( + jdbcSinkConfig.getTable(), fieldIde)); if (!catalog.databaseExists(jdbcSinkConfig.getDatabase())) { catalog.createDatabase(tablePath, true); } + catalogTable.getOptions().put("fieldIde", fieldIde); if (!catalog.tableExists(tablePath)) { catalog.createTable(tablePath, catalogTable, true); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkFactory.java index a26628ff3a43..d18ff0d7fdb1 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkFactory.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkFactory.java @@ -30,9 +30,11 @@ import org.apache.seatunnel.api.table.factory.TableFactoryContext; import org.apache.seatunnel.api.table.factory.TableSinkFactory; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.JdbcCatalogOptions; +import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions; import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSinkConfig; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectLoader; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.dialectenum.FieldIdeEnum; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; @@ -45,7 +47,6 @@ import java.util.stream.Collectors; import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions.AUTO_COMMIT; -import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions.BATCH_INTERVAL_MS; import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions.BATCH_SIZE; import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions.COMPATIBLE_MODE; import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions.CONNECTION_CHECK_TIMEOUT_SEC; @@ -143,10 +144,12 @@ public TableSink createSink(TableFactoryContext context) { } final ReadonlyConfig options = config; JdbcSinkConfig sinkConfig = JdbcSinkConfig.of(config); + FieldIdeEnum fieldIdeEnum = config.get(JdbcOptions.FIELD_IDE); JdbcDialect dialect = JdbcDialectLoader.load( sinkConfig.getJdbcConnectionConfig().getUrl(), - sinkConfig.getJdbcConnectionConfig().getCompatibleMode()); + sinkConfig.getJdbcConnectionConfig().getCompatibleMode(), + fieldIdeEnum == null ? null : fieldIdeEnum.getValue()); CatalogTable finalCatalogTable = catalogTable; return () -> new JdbcSink( @@ -166,7 +169,6 @@ public OptionRule optionRule() { PASSWORD, CONNECTION_CHECK_TIMEOUT_SEC, BATCH_SIZE, - BATCH_INTERVAL_MS, IS_EXACTLY_ONCE, GENERATE_SINK_SQL, AUTO_COMMIT, diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogTest.java index 6b8c49bc0abf..1c5fb5a2b22a 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogTest.java @@ -19,8 +19,6 @@ import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.catalog.TablePath; -import org.apache.seatunnel.common.utils.JdbcUrlUtil; -import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.mysql.MySqlCatalog; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -41,25 +39,10 @@ void testCatalog() { catalog.open(); - MySqlCatalog mySqlCatalog = - new MySqlCatalog( - "mysql", - "root", - "root@123", - JdbcUrlUtil.getUrlInfo("jdbc:mysql://127.0.0.1:33062/mingdongtest")); - - mySqlCatalog.open(); - - CatalogTable table1 = - mySqlCatalog.getTable(TablePath.of("mingdongtest", "all_types_table_02")); - List strings = catalog.listDatabases(); - System.out.println(strings); - - List strings1 = catalog.listTables("XE"); CatalogTable table = catalog.getTable(TablePath.of("XE", "TEST", "PG_TYPES_TABLE_CP1")); - catalog.createTableInternal(new TablePath("XE", "TEST", "TEST003"), table); + catalog.createTable(new TablePath("XE", "TEST", "TEST003"), table, false); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalogTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalogTest.java index badab864fc3f..6ef4d9e65484 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalogTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalogTest.java @@ -53,7 +53,7 @@ void testCatalog() { catalog.getTable(TablePath.of("st_test", "public", "all_types_table_02")); System.out.println("find table: " + table); - catalog.createTableInternal( - new TablePath("liulitest", "public", "all_types_table_02"), table); + catalog.createTable( + new TablePath("liulitest", "public", "all_types_table_02"), table, false); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sql/MysqlCreateTableSqlBuilderTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sql/MysqlCreateTableSqlBuilderTest.java index 3de5c65bf8d7..04e00f1de1a6 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sql/MysqlCreateTableSqlBuilderTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sql/MysqlCreateTableSqlBuilderTest.java @@ -93,12 +93,12 @@ public void testBuild() { MysqlCreateTableSqlBuilder.builder(tablePath, catalogTable).build("mysql"); // create table sql is change; The old unit tests are no longer applicable String expect = - "CREATE TABLE IF NOT EXISTS test_table (\n" - + "\tid null NOT NULL COMMENT 'id', \n" - + "\tname null NOT NULL COMMENT 'name', \n" - + "\tage null NULL COMMENT 'age', \n" - + "\tcreateTime null NULL COMMENT 'createTime', \n" - + "\tlastUpdateTime null NULL COMMENT 'lastUpdateTime', \n" + "CREATE TABLE `test_table` (\n" + + "\t`id` null NOT NULL COMMENT 'id', \n" + + "\t`name` null NOT NULL COMMENT 'name', \n" + + "\t`age` null NULL COMMENT 'age', \n" + + "\t`createTime` null NULL COMMENT 'createTime', \n" + + "\t`lastUpdateTime` null NULL COMMENT 'lastUpdateTime', \n" + "\tPRIMARY KEY (`id`)\n" + ") COMMENT = 'User table';"; CONSOLE.println(expect); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/PostgresDialectFactoryTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/PostgresDialectFactoryTest.java index 79b1f11ac93b..90b980a69e15 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/PostgresDialectFactoryTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/PostgresDialectFactoryTest.java @@ -30,7 +30,7 @@ public class PostgresDialectFactoryTest { @Test public void testPostgresDialectCreate() { PostgresDialectFactory postgresDialectFactory = new PostgresDialectFactory(); - JdbcDialect postgresLow = postgresDialectFactory.create("postgresLow"); + JdbcDialect postgresLow = postgresDialectFactory.create("postgresLow", ""); String[] fields = {"id", "name", "age"}; String[] uniqueKeyField = {"id"}; Optional upsertStatement = diff --git a/seatunnel-connectors-v2/connector-kafka/pom.xml b/seatunnel-connectors-v2/connector-kafka/pom.xml index 0ce4bba6b171..7955ab3f5467 100644 --- a/seatunnel-connectors-v2/connector-kafka/pom.xml +++ b/seatunnel-connectors-v2/connector-kafka/pom.xml @@ -31,6 +31,7 @@ 3.2.0 + 1.6.4.Final @@ -61,6 +62,17 @@ seatunnel-format-compatible-debezium-json ${project.version} + + org.apache.seatunnel + seatunnel-format-compatible-connect-json + ${project.version} + + + org.apache.kafka + connect-json + ${kafka.client.version} + + diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/config/MessageFormat.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/config/MessageFormat.java index 1ef29f6322a3..07f9a38ddffe 100644 --- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/config/MessageFormat.java +++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/config/MessageFormat.java @@ -22,5 +22,6 @@ public enum MessageFormat { TEXT, CANAL_JSON, DEBEZIUM_JSON, - COMPATIBLE_DEBEZIUM_JSON + COMPATIBLE_DEBEZIUM_JSON, + COMPATIBLE_KAFKA_CONNECT_JSON } diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSource.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSource.java index 30878e82a2c4..802d7986a94c 100644 --- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSource.java +++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSource.java @@ -45,6 +45,7 @@ import org.apache.seatunnel.connectors.seatunnel.kafka.config.StartMode; import org.apache.seatunnel.connectors.seatunnel.kafka.exception.KafkaConnectorException; import org.apache.seatunnel.connectors.seatunnel.kafka.state.KafkaSourceState; +import org.apache.seatunnel.format.compatible.kafka.connect.json.CompatibleKafkaConnectDeserializationSchema; import org.apache.seatunnel.format.json.JsonDeserializationSchema; import org.apache.seatunnel.format.json.canal.CanalJsonDeserializationSchema; import org.apache.seatunnel.format.json.debezium.DebeziumJsonDeserializationSchema; @@ -268,6 +269,11 @@ private void setDeserialization(Config config) { .setIgnoreParseErrors(true) .build(); break; + case COMPATIBLE_KAFKA_CONNECT_JSON: + deserializationSchema = + new CompatibleKafkaConnectDeserializationSchema( + typeInfo, config, false, false); + break; case DEBEZIUM_JSON: boolean includeSchema = DEBEZIUM_RECORD_INCLUDE_SCHEMA.defaultValue(); if (config.hasPath(DEBEZIUM_RECORD_INCLUDE_SCHEMA.key())) { diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceReader.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceReader.java index 226fded2409b..a2d3bae2b4d3 100644 --- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceReader.java +++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceReader.java @@ -25,6 +25,7 @@ import org.apache.seatunnel.connectors.seatunnel.kafka.config.MessageFormatErrorHandleWay; import org.apache.seatunnel.connectors.seatunnel.kafka.exception.KafkaConnectorErrorCode; import org.apache.seatunnel.connectors.seatunnel.kafka.exception.KafkaConnectorException; +import org.apache.seatunnel.format.compatible.kafka.connect.json.CompatibleKafkaConnectDeserializationSchema; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.consumer.ConsumerRecords; @@ -150,9 +151,18 @@ public void pollNext(Collector output) throws Exception { recordList) { try { - deserializationSchema.deserialize( - record.value(), output); - } catch (Exception e) { + if (deserializationSchema + instanceof + CompatibleKafkaConnectDeserializationSchema) { + ((CompatibleKafkaConnectDeserializationSchema) + deserializationSchema) + .deserialize( + record, output); + } else { + deserializationSchema.deserialize( + record.value(), output); + } + } catch (IOException e) { if (this.messageFormatErrorHandleWay == MessageFormatErrorHandleWay .SKIP) { diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumerator.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumerator.java index f1de236cfa03..9ab9f92841de 100644 --- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumerator.java +++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumerator.java @@ -312,7 +312,6 @@ private synchronized void assignSplit() { pendingSplit.clear(); } - @SuppressWarnings("checkstyle:MagicNumber") private static int getSplitOwner(TopicPartition tp, int numReaders) { int startIndex = ((tp.topic().hashCode() * 31) & 0x7FFFFFFF) % numReaders; return (startIndex + tp.partition()) % numReaders; diff --git a/seatunnel-connectors-v2/connector-mongodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/mongodb/source/config/MongodbReadOptions.java b/seatunnel-connectors-v2/connector-mongodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/mongodb/source/config/MongodbReadOptions.java index f0020eb41ad8..faffe15bc343 100644 --- a/seatunnel-connectors-v2/connector-mongodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/mongodb/source/config/MongodbReadOptions.java +++ b/seatunnel-connectors-v2/connector-mongodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/mongodb/source/config/MongodbReadOptions.java @@ -22,10 +22,10 @@ import java.io.Serializable; -import static com.google.common.base.Preconditions.checkArgument; import static org.apache.seatunnel.connectors.seatunnel.mongodb.config.MongodbConfig.CURSOR_NO_TIMEOUT; import static org.apache.seatunnel.connectors.seatunnel.mongodb.config.MongodbConfig.FETCH_SIZE; import static org.apache.seatunnel.connectors.seatunnel.mongodb.config.MongodbConfig.MAX_TIME_MIN; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; /** The configuration class for MongoDB source. */ @EqualsAndHashCode diff --git a/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/PulsarSource.java b/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/PulsarSource.java index cf5972aa1fb0..ccd71d728b11 100644 --- a/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/PulsarSource.java +++ b/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/PulsarSource.java @@ -113,7 +113,6 @@ public String getPluginName() { return PulsarConfigUtil.IDENTIFIER; } - @SuppressWarnings("checkstyle:MagicNumber") @Override public void prepare(Config config) throws PrepareFailException { CheckResult result = diff --git a/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/enumerator/PulsarSplitEnumerator.java b/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/enumerator/PulsarSplitEnumerator.java index ddf6cb2d66e1..1d4f46849587 100644 --- a/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/enumerator/PulsarSplitEnumerator.java +++ b/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/enumerator/PulsarSplitEnumerator.java @@ -217,7 +217,6 @@ private void addPartitionSplitChangeToPendingAssignments( subscriptionName); } - @SuppressWarnings("checkstyle:MagicNumber") static int getSplitOwner(TopicPartition tp, int numReaders) { int startIndex = ((tp.getTopic().hashCode() * 31) & 0x7FFFFFFF) % numReaders; diff --git a/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/enumerator/cursor/start/MessageIdStartCursor.java b/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/enumerator/cursor/start/MessageIdStartCursor.java index b70c8a540a84..c539f7102dc8 100644 --- a/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/enumerator/cursor/start/MessageIdStartCursor.java +++ b/seatunnel-connectors-v2/connector-pulsar/src/main/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/enumerator/cursor/start/MessageIdStartCursor.java @@ -24,7 +24,7 @@ import org.apache.pulsar.client.api.PulsarClientException; import org.apache.pulsar.client.impl.MessageIdImpl; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; /** This cursor would left pulsar start consuming from a specific message id. */ public class MessageIdStartCursor implements StartCursor { diff --git a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/config/RedisConfig.java b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/config/RedisConfig.java index c777d2378273..511cbe4aa993 100644 --- a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/config/RedisConfig.java +++ b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/config/RedisConfig.java @@ -102,6 +102,12 @@ public enum HashKeyParseMode { .withDescription( "hash key parse mode, support all or kv, default value is all"); + public static final Option EXPIRE = + Options.key("expire") + .longType() + .defaultValue(-1L) + .withDescription("Set redis expiration time."); + public enum Format { JSON, // TEXT will be supported later diff --git a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/config/RedisDataType.java b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/config/RedisDataType.java index 64772b5381d3..a315e0cdae0c 100644 --- a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/config/RedisDataType.java +++ b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/config/RedisDataType.java @@ -30,8 +30,9 @@ public enum RedisDataType { KEY { @Override - public void set(Jedis jedis, String key, String value) { + public void set(Jedis jedis, String key, String value, long expire) { jedis.set(key, value); + expire(jedis, key, expire); } @Override @@ -41,9 +42,10 @@ public List get(Jedis jedis, String key) { }, HASH { @Override - public void set(Jedis jedis, String key, String value) { + public void set(Jedis jedis, String key, String value, long expire) { Map fieldsMap = JsonUtils.toMap(value); jedis.hset(key, fieldsMap); + expire(jedis, key, expire); } @Override @@ -54,8 +56,9 @@ public List get(Jedis jedis, String key) { }, LIST { @Override - public void set(Jedis jedis, String key, String value) { + public void set(Jedis jedis, String key, String value, long expire) { jedis.lpush(key, value); + expire(jedis, key, expire); } @Override @@ -65,8 +68,9 @@ public List get(Jedis jedis, String key) { }, SET { @Override - public void set(Jedis jedis, String key, String value) { + public void set(Jedis jedis, String key, String value, long expire) { jedis.sadd(key, value); + expire(jedis, key, expire); } @Override @@ -77,8 +81,9 @@ public List get(Jedis jedis, String key) { }, ZSET { @Override - public void set(Jedis jedis, String key, String value) { + public void set(Jedis jedis, String key, String value, long expire) { jedis.zadd(key, 1, value); + expire(jedis, key, expire); } @Override @@ -91,7 +96,13 @@ public List get(Jedis jedis, String key) { return Collections.emptyList(); } - public void set(Jedis jedis, String key, String value) { + private static void expire(Jedis jedis, String key, long expire) { + if (expire > 0) { + jedis.expire(key, expire); + } + } + + public void set(Jedis jedis, String key, String value, long expire) { // do nothing } } diff --git a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/config/RedisParameters.java b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/config/RedisParameters.java index c8bb879d0f5b..8954b4da2a1f 100644 --- a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/config/RedisParameters.java +++ b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/config/RedisParameters.java @@ -47,6 +47,7 @@ public class RedisParameters implements Serializable { private RedisConfig.RedisMode mode; private RedisConfig.HashKeyParseMode hashKeyParseMode; private List redisNodes = Collections.emptyList(); + private long expire = RedisConfig.EXPIRE.defaultValue(); public void buildWithConfig(Config config) { // set host @@ -89,6 +90,9 @@ public void buildWithConfig(Config config) { if (config.hasPath(RedisConfig.KEY_PATTERN.key())) { this.keysPattern = config.getString(RedisConfig.KEY_PATTERN.key()); } + if (config.hasPath(RedisConfig.EXPIRE.key())) { + this.expire = config.getLong(RedisConfig.EXPIRE.key()); + } // set redis data type try { String dataType = config.getString(RedisConfig.DATA_TYPE.key()); diff --git a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSinkFactory.java b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSinkFactory.java index e68a893f79c3..22ae1568740e 100644 --- a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSinkFactory.java +++ b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSinkFactory.java @@ -41,7 +41,8 @@ public OptionRule optionRule() { RedisConfig.AUTH, RedisConfig.USER, RedisConfig.KEY_PATTERN, - RedisConfig.FORMAT) + RedisConfig.FORMAT, + RedisConfig.EXPIRE) .conditional(RedisConfig.MODE, RedisConfig.RedisMode.CLUSTER, RedisConfig.NODES) .build(); } diff --git a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSinkWriter.java b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSinkWriter.java index 657e3aaa5658..80b1449b9d6d 100644 --- a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSinkWriter.java +++ b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSinkWriter.java @@ -59,7 +59,8 @@ public void write(SeaTunnelRow element) throws IOException { } else { key = keyField; } - redisDataType.set(jedis, key, data); + long expire = redisParameters.getExpire(); + redisDataType.set(jedis, key, data, expire); } @Override diff --git a/seatunnel-connectors-v2/connector-rocketmq/src/main/java/org/apache/seatunnel/connectors/seatunnel/rocketmq/source/RocketMqSourceSplitEnumerator.java b/seatunnel-connectors-v2/connector-rocketmq/src/main/java/org/apache/seatunnel/connectors/seatunnel/rocketmq/source/RocketMqSourceSplitEnumerator.java index d933fe2e92bc..ce841a4bf080 100644 --- a/seatunnel-connectors-v2/connector-rocketmq/src/main/java/org/apache/seatunnel/connectors/seatunnel/rocketmq/source/RocketMqSourceSplitEnumerator.java +++ b/seatunnel-connectors-v2/connector-rocketmq/src/main/java/org/apache/seatunnel/connectors/seatunnel/rocketmq/source/RocketMqSourceSplitEnumerator.java @@ -77,7 +77,6 @@ public RocketMqSourceSplitEnumerator( this.discoveryIntervalMillis = discoveryIntervalMillis; } - @SuppressWarnings("checkstyle:MagicNumber") private static int getSplitOwner(MessageQueue messageQueue, int numReaders) { int startIndex = ((messageQueue.getQueueId() * 31) & 0x7FFFFFFF) % numReaders; return (startIndex + messageQueue.getQueueId()) % numReaders; diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/catalog/StarRocksCatalog.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/catalog/StarRocksCatalog.java index 7bf308b1ca8e..097dfa5b245d 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/catalog/StarRocksCatalog.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/catalog/StarRocksCatalog.java @@ -60,7 +60,7 @@ import java.util.Optional; import java.util.Set; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; public class StarRocksCatalog implements Catalog { diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksSinkManager.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksSinkManager.java index f4f37e584eb4..e7a1c8c2c5e8 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksSinkManager.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksSinkManager.java @@ -22,7 +22,6 @@ import org.apache.seatunnel.connectors.seatunnel.starrocks.exception.StarRocksConnectorException; import com.google.common.base.Strings; -import com.google.common.util.concurrent.ThreadFactoryBuilder; import lombok.extern.slf4j.Slf4j; import java.io.IOException; @@ -30,10 +29,6 @@ import java.util.ArrayList; import java.util.List; import java.util.UUID; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.TimeUnit; @Slf4j public class StarRocksSinkManager { @@ -42,18 +37,14 @@ public class StarRocksSinkManager { private final List batchList; private final StarRocksStreamLoadVisitor starrocksStreamLoadVisitor; - private ScheduledExecutorService scheduler; - private ScheduledFuture scheduledFuture; private volatile boolean initialize; private volatile Exception flushException; private int batchRowCount = 0; private long batchBytesSize = 0; - private final Integer batchIntervalMs; public StarRocksSinkManager(SinkConfig sinkConfig, List fileNames) { this.sinkConfig = sinkConfig; this.batchList = new ArrayList<>(); - this.batchIntervalMs = sinkConfig.getBatchIntervalMs(); starrocksStreamLoadVisitor = new StarRocksStreamLoadVisitor(sinkConfig, fileNames); } @@ -62,26 +53,6 @@ private void tryInit() throws IOException { return; } initialize = true; - - if (batchIntervalMs != null) { - scheduler = - Executors.newSingleThreadScheduledExecutor( - new ThreadFactoryBuilder() - .setNameFormat("StarRocks-sink-output-%s") - .build()); - scheduledFuture = - scheduler.scheduleAtFixedRate( - () -> { - try { - flush(); - } catch (IOException e) { - flushException = e; - } - }, - batchIntervalMs, - batchIntervalMs, - TimeUnit.MILLISECONDS); - } } public synchronized void write(String record) throws IOException { @@ -98,11 +69,6 @@ public synchronized void write(String record) throws IOException { } public synchronized void close() throws IOException { - if (scheduledFuture != null) { - scheduledFuture.cancel(false); - scheduler.shutdown(); - } - flush(); } diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/source/StarRocksRowBatchReader.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/source/StarRocksRowBatchReader.java index 4a1927058943..2ea7e98c7e5a 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/source/StarRocksRowBatchReader.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/source/StarRocksRowBatchReader.java @@ -50,7 +50,7 @@ import java.util.ArrayList; import java.util.List; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; @Slf4j public class StarRocksRowBatchReader { diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/source/model/QueryPlan.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/source/model/QueryPlan.java index 07d5b1629fc6..4b11d6da4508 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/source/model/QueryPlan.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/source/model/QueryPlan.java @@ -33,7 +33,6 @@ public class QueryPlan implements Serializable { private int status; @JsonProperty("opaqued_query_plan") - @SuppressWarnings("checkstyle:MemberName") private String queryPlan; private Map partitions; diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/SinkConfig.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/SinkConfig.java index f5a2d0dc88c1..c1709b693903 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/SinkConfig.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/SinkConfig.java @@ -51,7 +51,6 @@ public enum StreamLoadFormat { private int batchMaxSize; private long batchMaxBytes; - private Integer batchIntervalMs; private int maxRetries; private int retryBackoffMultiplierMs; private int maxRetryBackoffMs; @@ -74,8 +73,6 @@ public static SinkConfig of(ReadonlyConfig config) { config.getOptional(StarRocksSinkOptions.LABEL_PREFIX).ifPresent(sinkConfig::setLabelPrefix); sinkConfig.setBatchMaxSize(config.get(StarRocksSinkOptions.BATCH_MAX_SIZE)); sinkConfig.setBatchMaxBytes(config.get(StarRocksSinkOptions.BATCH_MAX_BYTES)); - config.getOptional(StarRocksSinkOptions.BATCH_INTERVAL_MS) - .ifPresent(sinkConfig::setBatchIntervalMs); config.getOptional(StarRocksSinkOptions.MAX_RETRIES).ifPresent(sinkConfig::setMaxRetries); config.getOptional(StarRocksSinkOptions.RETRY_BACKOFF_MULTIPLIER_MS) .ifPresent(sinkConfig::setRetryBackoffMultiplierMs); diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/SourceConfig.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/SourceConfig.java index 34b3dc6a8774..10d0358a8f1b 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/SourceConfig.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/SourceConfig.java @@ -46,7 +46,6 @@ public SourceConfig( super(nodeUrls, username, password, database, table); } - @SuppressWarnings("checkstyle:MagicNumber") public static final Option MAX_RETRIES = Options.key("max_retries") .intType() @@ -62,28 +61,24 @@ public SourceConfig( public static final Option SCAN_FILTER = Options.key("scan_filter").stringType().defaultValue("").withDescription("SQL filter"); - @SuppressWarnings("checkstyle:MagicNumber") public static final Option SCAN_CONNECT_TIMEOUT = Options.key("scan_connect_timeout_ms") .intType() .defaultValue(1000) .withDescription("scan connect timeout"); - @SuppressWarnings("checkstyle:MagicNumber") public static final Option SCAN_BATCH_ROWS = Options.key("scan_batch_rows") .intType() .defaultValue(1024) .withDescription("scan batch rows"); - @SuppressWarnings("checkstyle:MagicNumber") public static final Option SCAN_KEEP_ALIVE_MIN = Options.key("scan_keep_alive_min") .intType() .defaultValue(10) .withDescription("Max keep alive time min"); - @SuppressWarnings("checkstyle:MagicNumber") public static final Option SCAN_QUERY_TIMEOUT_SEC = Options.key("scan_query_timeout_sec") .intType() diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/StarRocksSinkOptions.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/StarRocksSinkOptions.java index 02918f0f96d7..1129d447162c 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/StarRocksSinkOptions.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/StarRocksSinkOptions.java @@ -60,6 +60,7 @@ public interface StarRocksSinkOptions { .stringType() .defaultValue( "CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` (\n" + + "${rowtype_primary_key},\n" + "${rowtype_fields}\n" + ") ENGINE=OLAP\n" + " PRIMARY KEY (${rowtype_primary_key})\n" @@ -75,21 +76,14 @@ public interface StarRocksSinkOptions { .intType() .defaultValue(1024) .withDescription( - "For batch writing, when the number of buffers reaches the number of batch_max_rows or the byte size of batch_max_bytes or the time reaches batch_interval_ms, the data will be flushed into the StarRocks"); + "For batch writing, when the number of buffers reaches the number of batch_max_rows or the byte size of batch_max_bytes or the time reaches checkpoint.interval, the data will be flushed into the StarRocks"); Option BATCH_MAX_BYTES = Options.key("batch_max_bytes") .longType() .defaultValue((long) (5 * 1024 * 1024)) .withDescription( - "For batch writing, when the number of buffers reaches the number of batch_max_rows or the byte size of batch_max_bytes or the time reaches batch_interval_ms, the data will be flushed into the StarRocks"); - - Option BATCH_INTERVAL_MS = - Options.key("batch_interval_ms") - .intType() - .noDefaultValue() - .withDescription( - "For batch writing, when the number of buffers reaches the number of batch_max_rows or the byte size of batch_max_bytes or the time reaches batch_interval_ms, the data will be flushed into the StarRocks"); + "For batch writing, when the number of buffers reaches the number of batch_max_rows or the byte size of batch_max_bytes or the time reaches checkpoint.interval, the data will be flushed into the StarRocks"); Option MAX_RETRIES = Options.key("max_retries") diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/serialize/StarRocksJsonSerializer.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/serialize/StarRocksJsonSerializer.java index 0e6de2f60127..5bf15c533a15 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/serialize/StarRocksJsonSerializer.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/serialize/StarRocksJsonSerializer.java @@ -19,9 +19,10 @@ import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.api.table.type.SqlType; import org.apache.seatunnel.common.utils.JsonUtils; -import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.Map; public class StarRocksJsonSerializer extends StarRocksBaseSerializer @@ -38,10 +39,22 @@ public StarRocksJsonSerializer(SeaTunnelRowType seaTunnelRowType, boolean enable @Override public String serialize(SeaTunnelRow row) { - Map rowMap = new HashMap<>(row.getFields().length); + Map rowMap = new LinkedHashMap<>(row.getFields().length); for (int i = 0; i < row.getFields().length; i++) { - Object value = convert(seaTunnelRowType.getFieldType(i), row.getField(i)); + SqlType sqlType = seaTunnelRowType.getFieldType(i).getSqlType(); + Object value; + if (sqlType == SqlType.ARRAY + || sqlType == SqlType.MAP + || sqlType == SqlType.ROW + || sqlType == SqlType.MULTIPLE_ROW) { + // If the field type is complex type, we should keep the origin value. + // It will be transformed to json string in the next step + // JsonUtils.toJsonString(rowMap). + value = row.getField(i); + } else { + value = convert(seaTunnelRowType.getFieldType(i), row.getField(i)); + } rowMap.put(seaTunnelRowType.getFieldName(i), value); } if (enableUpsertDelete) { diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSaveModeUtil.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSaveModeUtil.java index cb0d086859bb..bbbc04eb20e5 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSaveModeUtil.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSaveModeUtil.java @@ -27,6 +27,8 @@ import org.apache.commons.lang3.StringUtils; +import java.util.Comparator; +import java.util.List; import java.util.Map; import java.util.function.Function; import java.util.stream.Collectors; @@ -82,8 +84,14 @@ private static String mergeColumnInTemplate( Map columnMap = tableSchema.getColumns().stream() .collect(Collectors.toMap(Column::getName, Function.identity())); - for (String col : columnInTemplate.keySet()) { - CreateTableParser.ColumnInfo columnInfo = columnInTemplate.get(col); + List columnInfosInSeq = + columnInTemplate.values().stream() + .sorted( + Comparator.comparingInt( + CreateTableParser.ColumnInfo::getStartIndex)) + .collect(Collectors.toList()); + for (CreateTableParser.ColumnInfo columnInfo : columnInfosInSeq) { + String col = columnInfo.getName(); if (StringUtils.isEmpty(columnInfo.getInfo())) { if (columnMap.containsKey(col)) { Column column = columnMap.get(col); diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSinkFactory.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSinkFactory.java index 471be7001b68..c0159c5fd429 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSinkFactory.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSinkFactory.java @@ -49,7 +49,6 @@ public OptionRule optionRule() { StarRocksSinkOptions.LABEL_PREFIX, StarRocksSinkOptions.BATCH_MAX_SIZE, StarRocksSinkOptions.BATCH_MAX_BYTES, - StarRocksSinkOptions.BATCH_INTERVAL_MS, StarRocksSinkOptions.MAX_RETRIES, StarRocksSinkOptions.MAX_RETRY_BACKOFF_MS, StarRocksSinkOptions.RETRY_BACKOFF_MULTIPLIER_MS, diff --git a/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/StarRocksCreateTableTest.java b/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/StarRocksCreateTableTest.java index 22536ffd6849..b571deb68ad4 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/StarRocksCreateTableTest.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/StarRocksCreateTableTest.java @@ -22,8 +22,11 @@ import org.apache.seatunnel.api.table.catalog.PrimaryKey; import org.apache.seatunnel.api.table.catalog.TableSchema; import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.DecimalType; +import org.apache.seatunnel.api.table.type.LocalTimeType; import org.apache.seatunnel.connectors.seatunnel.starrocks.sink.StarRocksSaveModeUtil; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import java.util.ArrayList; @@ -70,4 +73,83 @@ public void test() { System.out.println(result); } + + @Test + public void testInSeq() { + + List columns = new ArrayList<>(); + + columns.add(PhysicalColumn.of("L_ORDERKEY", BasicType.INT_TYPE, null, false, null, "")); + columns.add(PhysicalColumn.of("L_PARTKEY", BasicType.INT_TYPE, null, false, null, "")); + columns.add(PhysicalColumn.of("L_SUPPKEY", BasicType.INT_TYPE, null, false, null, "")); + columns.add(PhysicalColumn.of("L_LINENUMBER", BasicType.INT_TYPE, null, false, null, "")); + columns.add(PhysicalColumn.of("L_QUANTITY", new DecimalType(15, 2), null, false, null, "")); + columns.add( + PhysicalColumn.of( + "L_EXTENDEDPRICE", new DecimalType(15, 2), null, false, null, "")); + columns.add(PhysicalColumn.of("L_DISCOUNT", new DecimalType(15, 2), null, false, null, "")); + columns.add(PhysicalColumn.of("L_TAX", new DecimalType(15, 2), null, false, null, "")); + columns.add( + PhysicalColumn.of("L_RETURNFLAG", BasicType.STRING_TYPE, null, false, null, "")); + columns.add( + PhysicalColumn.of("L_LINESTATUS", BasicType.STRING_TYPE, null, false, null, "")); + columns.add( + PhysicalColumn.of( + "L_SHIPDATE", LocalTimeType.LOCAL_DATE_TYPE, null, false, null, "")); + columns.add( + PhysicalColumn.of( + "L_COMMITDATE", LocalTimeType.LOCAL_DATE_TYPE, null, false, null, "")); + columns.add( + PhysicalColumn.of( + "L_RECEIPTDATE", LocalTimeType.LOCAL_DATE_TYPE, null, false, null, "")); + columns.add( + PhysicalColumn.of("L_SHIPINSTRUCT", BasicType.STRING_TYPE, null, false, null, "")); + columns.add(PhysicalColumn.of("L_SHIPMODE", BasicType.STRING_TYPE, null, false, null, "")); + columns.add(PhysicalColumn.of("L_COMMENT", BasicType.STRING_TYPE, null, false, null, "")); + + String result = + StarRocksSaveModeUtil.fillingCreateSql( + "CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` (\n" + + "`L_COMMITDATE`,\n" + + "${rowtype_primary_key},\n" + + "L_SUPPKEY BIGINT NOT NULL,\n" + + "${rowtype_fields}\n" + + ") ENGINE=OLAP\n" + + " PRIMARY KEY (L_COMMITDATE, ${rowtype_primary_key}, L_SUPPKEY)\n" + + "DISTRIBUTED BY HASH (${rowtype_primary_key})" + + "PROPERTIES (\n" + + " \"replication_num\" = \"1\" \n" + + ")", + "tpch", + "lineitem", + TableSchema.builder() + .primaryKey( + PrimaryKey.of( + "", Arrays.asList("L_ORDERKEY", "L_LINENUMBER"))) + .columns(columns) + .build()); + String expected = + "CREATE TABLE IF NOT EXISTS `tpch`.`lineitem` (\n" + + "`L_COMMITDATE` DATE NOT NULL ,\n" + + "`L_ORDERKEY` INT NOT NULL ,`L_LINENUMBER` INT NOT NULL ,\n" + + "L_SUPPKEY BIGINT NOT NULL,\n" + + "`L_PARTKEY` INT NOT NULL ,\n" + + "`L_QUANTITY` Decimal(15, 2) NOT NULL ,\n" + + "`L_EXTENDEDPRICE` Decimal(15, 2) NOT NULL ,\n" + + "`L_DISCOUNT` Decimal(15, 2) NOT NULL ,\n" + + "`L_TAX` Decimal(15, 2) NOT NULL ,\n" + + "`L_RETURNFLAG` STRING NOT NULL ,\n" + + "`L_LINESTATUS` STRING NOT NULL ,\n" + + "`L_SHIPDATE` DATE NOT NULL ,\n" + + "`L_RECEIPTDATE` DATE NOT NULL ,\n" + + "`L_SHIPINSTRUCT` STRING NOT NULL ,\n" + + "`L_SHIPMODE` STRING NOT NULL ,\n" + + "`L_COMMENT` STRING NOT NULL \n" + + ") ENGINE=OLAP\n" + + " PRIMARY KEY (L_COMMITDATE, `L_ORDERKEY`,`L_LINENUMBER`, L_SUPPKEY)\n" + + "DISTRIBUTED BY HASH (`L_ORDERKEY`,`L_LINENUMBER`)PROPERTIES (\n" + + " \"replication_num\" = \"1\" \n" + + ")"; + Assertions.assertEquals(result, expected); + } } diff --git a/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/serialize/StarRocksJsonSerializerTest.java b/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/serialize/StarRocksJsonSerializerTest.java new file mode 100644 index 000000000000..6e0d9476441d --- /dev/null +++ b/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/serialize/StarRocksJsonSerializerTest.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.starrocks.serialize; + +import org.apache.seatunnel.api.table.type.ArrayType; +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.MapType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.Collections; + +public class StarRocksJsonSerializerTest { + + @Test + public void serialize() { + String[] filedNames = {"id", "name", "array", "map"}; + SeaTunnelDataType[] filedTypes = { + BasicType.LONG_TYPE, + BasicType.STRING_TYPE, + ArrayType.STRING_ARRAY_TYPE, + new MapType<>(BasicType.STRING_TYPE, BasicType.STRING_TYPE) + }; + + SeaTunnelRowType seaTunnelRowType = new SeaTunnelRowType(filedNames, filedTypes); + StarRocksJsonSerializer starRocksJsonSerializer = + new StarRocksJsonSerializer(seaTunnelRowType, false); + Object[] fields = { + 1, "Tom", new String[] {"tag1", "tag2"}, Collections.singletonMap("key1", "value1") + }; + SeaTunnelRow seaTunnelRow = new SeaTunnelRow(fields); + String jsonString = starRocksJsonSerializer.serialize(seaTunnelRow); + Assertions.assertEquals( + "{\"id\":1,\"name\":\"Tom\",\"array\":[\"tag1\",\"tag2\"],\"map\":{\"key1\":\"value1\"}}", + jsonString); + } +} diff --git a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/config/TablestoreConfig.java b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/config/TablestoreConfig.java index f64eb8473b0c..3e1714c55161 100644 --- a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/config/TablestoreConfig.java +++ b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/config/TablestoreConfig.java @@ -50,11 +50,6 @@ public class TablestoreConfig implements Serializable { .stringType() .defaultValue("25") .withDescription(" Tablestore batch_size"); - public static final Option BATCH_INTERVAL_MS = - Options.key("batch_interval_ms") - .stringType() - .defaultValue("1000") - .withDescription(" Tablestore batch_interval_ms"); public static final Option PRIMARY_KEYS = Options.key("primary_keys") .stringType() diff --git a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/config/TablestoreOptions.java b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/config/TablestoreOptions.java index ba6c00893956..7b2aa6bae679 100644 --- a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/config/TablestoreOptions.java +++ b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/config/TablestoreOptions.java @@ -25,7 +25,6 @@ import java.io.Serializable; import java.util.List; -import static org.apache.seatunnel.connectors.seatunnel.tablestore.config.TablestoreConfig.BATCH_INTERVAL_MS; import static org.apache.seatunnel.connectors.seatunnel.tablestore.config.TablestoreConfig.BATCH_SIZE; @Data @@ -45,7 +44,6 @@ public class TablestoreOptions implements Serializable { private List primaryKeys; public int batchSize = Integer.parseInt(BATCH_SIZE.defaultValue()); - public int batchIntervalMs = Integer.parseInt(BATCH_INTERVAL_MS.defaultValue()); public TablestoreOptions(Config config) { this.endpoint = config.getString(TablestoreConfig.END_POINT.key()); @@ -58,8 +56,5 @@ public TablestoreOptions(Config config) { if (config.hasPath(BATCH_SIZE.key())) { this.batchSize = config.getInt(BATCH_SIZE.key()); } - if (config.hasPath(TablestoreConfig.BATCH_INTERVAL_MS.key())) { - this.batchIntervalMs = config.getInt(TablestoreConfig.BATCH_INTERVAL_MS.key()); - } } } diff --git a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/sink/TablestoreSinkClient.java b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/sink/TablestoreSinkClient.java index e3b6f2fbdf31..0637b9b038cf 100644 --- a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/sink/TablestoreSinkClient.java +++ b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/sink/TablestoreSinkClient.java @@ -27,22 +27,15 @@ import com.alicloud.openservices.tablestore.model.BatchWriteRowRequest; import com.alicloud.openservices.tablestore.model.BatchWriteRowResponse; import com.alicloud.openservices.tablestore.model.RowPutChange; -import com.google.common.util.concurrent.ThreadFactoryBuilder; import lombok.extern.slf4j.Slf4j; import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.TimeUnit; @Slf4j public class TablestoreSinkClient { private final TablestoreOptions tablestoreOptions; - private ScheduledExecutorService scheduler; - private ScheduledFuture scheduledFuture; private volatile boolean initialize; private volatile Exception flushException; private SyncClient syncClient; @@ -64,24 +57,6 @@ private void tryInit() throws IOException { tablestoreOptions.getAccessKeySecret(), tablestoreOptions.getInstanceName()); - scheduler = - Executors.newSingleThreadScheduledExecutor( - new ThreadFactoryBuilder() - .setNameFormat("Tablestore-sink-output-%s") - .build()); - scheduledFuture = - scheduler.scheduleAtFixedRate( - () -> { - try { - flush(); - } catch (IOException e) { - flushException = e; - } - }, - tablestoreOptions.getBatchIntervalMs(), - tablestoreOptions.getBatchIntervalMs(), - TimeUnit.MILLISECONDS); - initialize = true; } @@ -96,17 +71,13 @@ public void write(RowPutChange rowPutChange) throws IOException { } public void close() throws IOException { - if (scheduledFuture != null) { - scheduledFuture.cancel(false); - scheduler.shutdown(); - } if (syncClient != null) { flush(); syncClient.shutdown(); } } - synchronized void flush() throws IOException { + synchronized void flush() { checkFlushException(); if (batchList.isEmpty()) { return; diff --git a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/sink/TablestoreSinkFactory.java b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/sink/TablestoreSinkFactory.java index efe39a08c4a6..674f641ad648 100644 --- a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/sink/TablestoreSinkFactory.java +++ b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/sink/TablestoreSinkFactory.java @@ -26,7 +26,6 @@ import static org.apache.seatunnel.connectors.seatunnel.tablestore.config.TablestoreConfig.ACCESS_KEY_ID; import static org.apache.seatunnel.connectors.seatunnel.tablestore.config.TablestoreConfig.ACCESS_KEY_SECRET; -import static org.apache.seatunnel.connectors.seatunnel.tablestore.config.TablestoreConfig.BATCH_INTERVAL_MS; import static org.apache.seatunnel.connectors.seatunnel.tablestore.config.TablestoreConfig.BATCH_SIZE; import static org.apache.seatunnel.connectors.seatunnel.tablestore.config.TablestoreConfig.END_POINT; import static org.apache.seatunnel.connectors.seatunnel.tablestore.config.TablestoreConfig.INSTANCE_NAME; @@ -51,7 +50,7 @@ public OptionRule optionRule() { ACCESS_KEY_SECRET, PRIMARY_KEYS, CatalogTableUtil.SCHEMA) - .optional(BATCH_INTERVAL_MS, BATCH_SIZE) + .optional(BATCH_SIZE) .build(); } } diff --git a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/sink/TablestoreWriter.java b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/sink/TablestoreWriter.java index 929a421f7f5c..22bfe1be27f4 100644 --- a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/sink/TablestoreWriter.java +++ b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/sink/TablestoreWriter.java @@ -25,6 +25,7 @@ import org.apache.seatunnel.connectors.seatunnel.tablestore.serialize.SeaTunnelRowSerializer; import java.io.IOException; +import java.util.Optional; public class TablestoreWriter extends AbstractSinkWriter { @@ -46,4 +47,10 @@ public void write(SeaTunnelRow element) throws IOException { public void close() throws IOException { tablestoreSinkClient.close(); } + + @Override + public Optional prepareCommit() { + tablestoreSinkClient.flush(); + return super.prepareCommit(); + } } diff --git a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/sink/TDengineSinkWriter.java b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/sink/TDengineSinkWriter.java index 34c773aef081..6a069d531ae0 100644 --- a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/sink/TDengineSinkWriter.java +++ b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/sink/TDengineSinkWriter.java @@ -81,7 +81,6 @@ public TDengineSinkWriter(Config pluginConfig, SeaTunnelRowType seaTunnelRowType @SneakyThrows @Override - @SuppressWarnings("checkstyle:RegexpSingleline") public void write(SeaTunnelRow element) { final ArrayList tags = Lists.newArrayList(); for (int i = element.getArity() - tagsNum; i < element.getArity(); i++) { diff --git a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/typemapper/TDengineTypeMapper.java b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/typemapper/TDengineTypeMapper.java index c9a0751109d9..a32b1af02140 100644 --- a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/typemapper/TDengineTypeMapper.java +++ b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/typemapper/TDengineTypeMapper.java @@ -80,7 +80,6 @@ public class TDengineTypeMapper { private static final String TDENGINE_VARBINARY = "VARBINARY"; private static final String TDENGINE_GEOMETRY = "GEOMETRY"; - @SuppressWarnings("checkstyle:MagicNumber") public static SeaTunnelDataType mapping(String tdengineType) { switch (tdengineType) { case TDENGINE_BIT: diff --git a/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/command/AbstractCommandArgs.java b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/command/AbstractCommandArgs.java index 9b818ca95f61..ada15490f0a1 100644 --- a/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/command/AbstractCommandArgs.java +++ b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/command/AbstractCommandArgs.java @@ -41,6 +41,7 @@ public abstract class AbstractCommandArgs extends CommandArgs { /** user-defined parameters */ @Parameter( names = {"-i", "--variable"}, + splitter = ParameterSplitter.class, description = "Variable substitution, such as -i city=beijing, or -i date=20190318") protected List variables = Collections.emptyList(); diff --git a/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/command/ParameterSplitter.java b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/command/ParameterSplitter.java new file mode 100644 index 000000000000..29263d417e7c --- /dev/null +++ b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/command/ParameterSplitter.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.core.starter.command; + +import com.beust.jcommander.converters.IParameterSplitter; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class ParameterSplitter implements IParameterSplitter { + + @Override + public List split(String value) { + if (!value.contains(",")) { + return Collections.singletonList(value); + } + + List result = new ArrayList<>(); + StringBuilder currentToken = new StringBuilder(); + boolean insideBrackets = false; + + for (char c : value.toCharArray()) { + if (c == '[') { + insideBrackets = true; + } else if (c == ']') { + insideBrackets = false; + } + + if (c == ',' && !insideBrackets) { + result.add(currentToken.toString().trim()); + currentToken = new StringBuilder(); + } else { + currentToken.append(c); + } + } + + if (currentToken.length() > 0) { + result.add(currentToken.toString().trim()); + } + + return result; + } +} diff --git a/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/utils/ConfigBuilder.java b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/utils/ConfigBuilder.java index ed66b550a046..ad063acac8a6 100644 --- a/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/utils/ConfigBuilder.java +++ b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/utils/ConfigBuilder.java @@ -69,6 +69,12 @@ public static Config of(@NonNull Path filePath) { return config; } + public static Config of(@NonNull Map objectMap) { + log.info("Loading config file from objectMap"); + Config config = ConfigFactory.parseMap(objectMap); + return ConfigShadeUtils.decryptConfig(config); + } + public static Config of(@NonNull ConfigAdapter configAdapter, @NonNull Path filePath) { log.info("With config adapter spi {}", configAdapter.getClass().getName()); try { diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/bin/start-seatunnel-flink-13-connector-v2.cmd b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/bin/start-seatunnel-flink-13-connector-v2.cmd new file mode 100644 index 000000000000..c1cbc1d9556a --- /dev/null +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/bin/start-seatunnel-flink-13-connector-v2.cmd @@ -0,0 +1,71 @@ +@echo off +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. + +setlocal enabledelayedexpansion + +rem resolve links - %0 may be a softlink +set "PRG=%~f0" +:resolve_loop +rem Get the parent directory of the script +set "PRG_DIR=%~dp0" +rem Change current drive and directory to %PRG_DIR% and execute the 'dir' command, which will fail if %PRG% is not a valid file. +cd /d "%PRG_DIR%" || ( + echo Cannot determine the script's current directory. + exit /b 1 +) + +set "APP_DIR=%~dp0" +set "CONF_DIR=%APP_DIR%\config" +set "APP_JAR=%APP_DIR%\starter\seatunnel-flink-13-starter.jar" +set "APP_MAIN=org.apache.seatunnel.core.starter.flink.FlinkStarter" + +if exist "%CONF_DIR%\seatunnel-env.cmd" ( + call "%CONF_DIR%\seatunnel-env.cmd" +) + +if "%~1"=="" ( + set "args=-h" +) else ( + set "args=%*" +) + +set "JAVA_OPTS=" +rem Log4j2 Config +if exist "%CONF_DIR%\log4j2.properties" ( + set "JAVA_OPTS=!JAVA_OPTS! -Dlog4j2.configurationFile=%CONF_DIR%\log4j2.properties" + set "JAVA_OPTS=!JAVA_OPTS! -Dseatunnel.logs.path=%APP_DIR%\logs" + set "JAVA_OPTS=!JAVA_OPTS! -Dseatunnel.logs.file_name=seatunnel-flink-starter" +) + +set "CLASS_PATH=%APP_DIR%\starter\logging\*;%APP_JAR%" + +for /f "delims=" %%i in ('java %JAVA_OPTS% -cp %CLASS_PATH% %APP_MAIN% %args%') do ( + set "CMD=%%i" + setlocal disabledelayedexpansion + if !errorlevel! equ 234 ( + echo !CMD! + endlocal + exit /b 0 + ) else if !errorlevel! equ 0 ( + echo Execute SeaTunnel Flink Job: !CMD! + endlocal + call !CMD! + ) else ( + echo !CMD! + endlocal + exit /b !errorlevel! + ) +) diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java index 078c29bbfafe..5dc1d32cef5c 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java @@ -44,7 +44,6 @@ public class FlinkStarter implements Starter { this.appJar = Common.appStarterDir().resolve(APP_JAR_NAME).toString(); } - @SuppressWarnings("checkstyle:RegexpSingleline") public static void main(String[] args) { FlinkStarter flinkStarter = new FlinkStarter(args); System.out.println(String.join(" ", flinkStarter.buildCommands())); diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java index 7fb75064a4c8..996c9698fb00 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java @@ -265,7 +265,10 @@ private void setCheckpoint() { } } - if (config.hasPath(ConfigKeyName.CHECKPOINT_TIMEOUT)) { + if (config.hasPath(EnvCommonOptions.CHECKPOINT_TIMEOUT.key())) { + long timeout = config.getLong(EnvCommonOptions.CHECKPOINT_TIMEOUT.key()); + checkpointConfig.setCheckpointTimeout(timeout); + } else if (config.hasPath(ConfigKeyName.CHECKPOINT_TIMEOUT)) { long timeout = config.getLong(ConfigKeyName.CHECKPOINT_TIMEOUT); checkpointConfig.setCheckpointTimeout(timeout); } @@ -313,19 +316,22 @@ private void setCheckpoint() { } } - public void registerResultTable(Config config, DataStream dataStream) { - if (config.hasPath(RESULT_TABLE_NAME)) { - String name = config.getString(RESULT_TABLE_NAME); - StreamTableEnvironment tableEnvironment = this.getStreamTableEnvironment(); - if (!TableUtil.tableExists(tableEnvironment, name)) { + public void registerResultTable( + Config config, DataStream dataStream, String name, Boolean isAppend) { + StreamTableEnvironment tableEnvironment = this.getStreamTableEnvironment(); + if (!TableUtil.tableExists(tableEnvironment, name)) { + if (isAppend) { if (config.hasPath("field_name")) { String fieldName = config.getString("field_name"); tableEnvironment.registerDataStream(name, dataStream, fieldName); - } else { - tableEnvironment.registerDataStream(name, dataStream); + return; } + tableEnvironment.registerDataStream(name, dataStream); + return; } } + tableEnvironment.createTemporaryView( + name, tableEnvironment.fromChangelogStream(dataStream)); } public static FlinkRuntimeEnvironment getInstance(Config config) { diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-15-starter/src/main/bin/start-seatunnel-flink-15-connector-v2.cmd b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-15-starter/src/main/bin/start-seatunnel-flink-15-connector-v2.cmd new file mode 100644 index 000000000000..ed4c1f6979e1 --- /dev/null +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-15-starter/src/main/bin/start-seatunnel-flink-15-connector-v2.cmd @@ -0,0 +1,71 @@ +@echo off +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. + +setlocal enabledelayedexpansion + +rem resolve links - %0 may be a softlink +set "PRG=%~f0" +:resolve_loop +rem Get the parent directory of the script +set "PRG_DIR=%~dp0" +rem Change current drive and directory to %PRG_DIR% and execute the 'dir' command, which will fail if %PRG% is not a valid file. +cd /d "%PRG_DIR%" || ( + echo Cannot determine the script's current directory. + exit /b 1 +) + +set "APP_DIR=%~dp0" +set "CONF_DIR=%APP_DIR%\config" +set "APP_JAR=%APP_DIR%\starter\seatunnel-flink-15-starter.jar" +set "APP_MAIN=org.apache.seatunnel.core.starter.flink.FlinkStarter" + +if exist "%CONF_DIR%\seatunnel-env.cmd" ( + call "%CONF_DIR%\seatunnel-env.cmd" +) + +if "%~1"=="" ( + set "args=-h" +) else ( + set "args=%*" +) + +set "JAVA_OPTS=" +rem Log4j2 Config +if exist "%CONF_DIR%\log4j2.properties" ( + set "JAVA_OPTS=!JAVA_OPTS! -Dlog4j2.configurationFile=%CONF_DIR%\log4j2.properties" + set "JAVA_OPTS=!JAVA_OPTS! -Dseatunnel.logs.path=%APP_DIR%\logs" + set "JAVA_OPTS=!JAVA_OPTS! -Dseatunnel.logs.file_name=seatunnel-flink-starter" +) + +set "CLASS_PATH=%APP_DIR%\starter\logging\*;%APP_JAR%" + +for /f "delims=" %%i in ('java %JAVA_OPTS% -cp %CLASS_PATH% %APP_MAIN% %args%') do ( + set "CMD=%%i" + setlocal disabledelayedexpansion + if !errorlevel! equ 234 ( + echo !CMD! + endlocal + exit /b 0 + ) else if !errorlevel! equ 0 ( + echo Execute SeaTunnel Flink Job: !CMD! + endlocal + call !CMD! + ) else ( + echo !CMD! + endlocal + exit /b !errorlevel! + ) +) diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java index 2f9021c68bd7..7373cb58ed53 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/FlinkStarter.java @@ -44,7 +44,6 @@ public class FlinkStarter implements Starter { this.appJar = Common.appStarterDir().resolve(APP_JAR_NAME).toString(); } - @SuppressWarnings("checkstyle:RegexpSingleline") public static void main(String[] args) { FlinkStarter flinkStarter = new FlinkStarter(args); System.out.println(String.join(" ", flinkStarter.buildCommands())); diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkAbstractPluginExecuteProcessor.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkAbstractPluginExecuteProcessor.java index e9d36ba068e6..6c61f61b9575 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkAbstractPluginExecuteProcessor.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkAbstractPluginExecuteProcessor.java @@ -31,15 +31,19 @@ import java.net.URL; import java.net.URLClassLoader; +import java.util.HashMap; import java.util.List; import java.util.Optional; import java.util.function.BiConsumer; +import static org.apache.seatunnel.api.common.CommonOptions.RESULT_TABLE_NAME; + public abstract class FlinkAbstractPluginExecuteProcessor implements PluginExecuteProcessor, FlinkRuntimeEnvironment> { protected static final String ENGINE_TYPE = "seatunnel"; protected static final String PLUGIN_NAME = "plugin_name"; protected static final String SOURCE_TABLE_NAME = "source_table_name"; + protected static HashMap isAppendMap = new HashMap<>(); protected static final BiConsumer ADD_URL_TO_CLASSLOADER = (classLoader, url) -> { @@ -76,14 +80,41 @@ protected Optional> fromSourceTable(Config pluginConfig) { if (pluginConfig.hasPath(SOURCE_TABLE_NAME)) { StreamTableEnvironment tableEnvironment = flinkRuntimeEnvironment.getStreamTableEnvironment(); - Table table = tableEnvironment.from(pluginConfig.getString(SOURCE_TABLE_NAME)); - return Optional.ofNullable(TableUtil.tableToDataStream(tableEnvironment, table, true)); + String tableName = pluginConfig.getString(SOURCE_TABLE_NAME); + Table table = tableEnvironment.from(tableName); + return Optional.ofNullable( + TableUtil.tableToDataStream( + tableEnvironment, table, isAppendMap.getOrDefault(tableName, true))); } return Optional.empty(); } protected void registerResultTable(Config pluginConfig, DataStream dataStream) { - flinkRuntimeEnvironment.registerResultTable(pluginConfig, dataStream); + if (pluginConfig.hasPath(RESULT_TABLE_NAME.key())) { + String resultTable = pluginConfig.getString(RESULT_TABLE_NAME.key()); + if (pluginConfig.hasPath(SOURCE_TABLE_NAME)) { + String sourceTable = pluginConfig.getString(SOURCE_TABLE_NAME); + flinkRuntimeEnvironment.registerResultTable( + pluginConfig, + dataStream, + resultTable, + isAppendMap.getOrDefault(sourceTable, true)); + registerAppendStream(pluginConfig); + return; + } + flinkRuntimeEnvironment.registerResultTable( + pluginConfig, + dataStream, + resultTable, + isAppendMap.getOrDefault(resultTable, true)); + } + } + + protected void registerAppendStream(Config pluginConfig) { + if (pluginConfig.hasPath(RESULT_TABLE_NAME.key())) { + String tableName = pluginConfig.getString(RESULT_TABLE_NAME.key()); + isAppendMap.put(tableName, false); + } } protected abstract List initializePlugins( diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkExecution.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkExecution.java index a3282cc4a1e2..5a4050d884d7 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkExecution.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkExecution.java @@ -26,6 +26,7 @@ import org.apache.seatunnel.common.Constants; import org.apache.seatunnel.common.config.Common; import org.apache.seatunnel.common.config.TypesafeConfigUtils; +import org.apache.seatunnel.common.constants.JobMode; import org.apache.seatunnel.common.utils.SeaTunnelException; import org.apache.seatunnel.core.starter.exception.TaskExecuteException; import org.apache.seatunnel.core.starter.execution.PluginExecuteProcessor; @@ -33,6 +34,7 @@ import org.apache.seatunnel.core.starter.execution.TaskExecution; import org.apache.seatunnel.core.starter.flink.FlinkStarter; +import org.apache.flink.api.common.RuntimeExecutionMode; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.types.Row; @@ -111,6 +113,12 @@ public void execute() throws TaskExecuteException { "Flink Execution Plan: {}", flinkRuntimeEnvironment.getStreamExecutionEnvironment().getExecutionPlan()); log.info("Flink job name: {}", flinkRuntimeEnvironment.getJobName()); + if (!flinkRuntimeEnvironment.isStreaming()) { + flinkRuntimeEnvironment + .getStreamExecutionEnvironment() + .setRuntimeMode(RuntimeExecutionMode.BATCH); + log.info("Flink job Mode: {}", JobMode.BATCH); + } try { flinkRuntimeEnvironment .getStreamExecutionEnvironment() diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java index 4b5bef07cb05..12168921d8c8 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java @@ -265,7 +265,10 @@ private void setCheckpoint() { } } - if (config.hasPath(ConfigKeyName.CHECKPOINT_TIMEOUT)) { + if (config.hasPath(EnvCommonOptions.CHECKPOINT_TIMEOUT.key())) { + long timeout = config.getLong(EnvCommonOptions.CHECKPOINT_TIMEOUT.key()); + checkpointConfig.setCheckpointTimeout(timeout); + } else if (config.hasPath(ConfigKeyName.CHECKPOINT_TIMEOUT)) { long timeout = config.getLong(ConfigKeyName.CHECKPOINT_TIMEOUT); checkpointConfig.setCheckpointTimeout(timeout); } @@ -313,19 +316,22 @@ private void setCheckpoint() { } } - public void registerResultTable(Config config, DataStream dataStream) { - if (config.hasPath(RESULT_TABLE_NAME)) { - String name = config.getString(RESULT_TABLE_NAME); - StreamTableEnvironment tableEnvironment = this.getStreamTableEnvironment(); - if (!TableUtil.tableExists(tableEnvironment, name)) { + public void registerResultTable( + Config config, DataStream dataStream, String name, Boolean isAppend) { + StreamTableEnvironment tableEnvironment = this.getStreamTableEnvironment(); + if (!TableUtil.tableExists(tableEnvironment, name)) { + if (isAppend) { if (config.hasPath("field_name")) { String fieldName = config.getString("field_name"); tableEnvironment.registerDataStream(name, dataStream, fieldName); - } else { - tableEnvironment.registerDataStream(name, dataStream); + return; } + tableEnvironment.registerDataStream(name, dataStream); + return; } } + tableEnvironment.createTemporaryView( + name, tableEnvironment.fromChangelogStream(dataStream)); } public static FlinkRuntimeEnvironment getInstance(Config config) { diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SourceExecuteProcessor.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SourceExecuteProcessor.java index a3897a526e90..f3ebdd043786 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SourceExecuteProcessor.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SourceExecuteProcessor.java @@ -65,20 +65,23 @@ public List> execute(List> upstreamDataStreams) List> sources = new ArrayList<>(); for (int i = 0; i < plugins.size(); i++) { SeaTunnelSource internalSource = plugins.get(i); + Config pluginConfig = pluginConfigs.get(i); BaseSeaTunnelSourceFunction sourceFunction; if (internalSource instanceof SupportCoordinate) { sourceFunction = new SeaTunnelCoordinatedSource(internalSource); + registerAppendStream(pluginConfig); } else { sourceFunction = new SeaTunnelParallelSource(internalSource); } + boolean bounded = + internalSource.getBoundedness() + == org.apache.seatunnel.api.source.Boundedness.BOUNDED; DataStreamSource sourceStream = addSource( executionEnvironment, sourceFunction, "SeaTunnel " + internalSource.getClass().getSimpleName(), - internalSource.getBoundedness() - == org.apache.seatunnel.api.source.Boundedness.BOUNDED); - Config pluginConfig = pluginConfigs.get(i); + bounded); if (pluginConfig.hasPath(CommonOptions.PARALLELISM.key())) { int parallelism = pluginConfig.getInt(CommonOptions.PARALLELISM.key()); sourceStream.setParallelism(parallelism); diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/utils/TableUtil.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/utils/TableUtil.java index ca1603cdf997..aad97518f4b5 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/utils/TableUtil.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/utils/TableUtil.java @@ -37,11 +37,9 @@ public static DataStream tableToDataStream( if (isAppend) { return tableEnvironment.toAppendStream(table, typeInfo); } - return tableEnvironment - .toRetractStream(table, typeInfo) - .filter(row -> row.f0) - .map(row -> row.f1) - .returns(typeInfo); + DataStream dataStream = tableEnvironment.toChangelogStream(table); + dataStream.getTransformation().setOutputType(typeInfo); + return dataStream; } public static boolean tableExists(TableEnvironment tableEnvironment, String name) { diff --git a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/bin/start-seatunnel-spark-2-connector-v2.cmd b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/bin/start-seatunnel-spark-2-connector-v2.cmd new file mode 100644 index 000000000000..b2671671383a --- /dev/null +++ b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/bin/start-seatunnel-spark-2-connector-v2.cmd @@ -0,0 +1,71 @@ +@echo off +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. + +setlocal enabledelayedexpansion + +rem resolve links - %0 may be a softlink +set "PRG=%~f0" +:resolve_loop +rem Get the parent directory of the script +set "PRG_DIR=%~dp0" +rem Change current drive and directory to %PRG_DIR% and execute the 'dir' command, which will fail if %PRG% is not a valid file. +cd /d "%PRG_DIR%" || ( + echo Cannot determine the script's current directory. + exit /b 1 +) + +set "APP_DIR=%~dp0" +set "CONF_DIR=%APP_DIR%\config" +set "APP_JAR=%APP_DIR%\starter\seatunnel-spark-2-starter.jar" +set "APP_MAIN=org.apache.seatunnel.core.starter.spark.SparkStarter" + +if exist "%CONF_DIR%\seatunnel-env.cmd" ( + call "%CONF_DIR%\seatunnel-env.cmd" +) + +if "%~1"=="" ( + set "args=-h" +) else ( + set "args=%*" +) + +set "JAVA_OPTS=" +rem Log4j2 Config +if exist "%CONF_DIR%\log4j2.properties" ( + set "JAVA_OPTS=!JAVA_OPTS! -Dlog4j2.configurationFile=%CONF_DIR%\log4j2.properties" + set "JAVA_OPTS=!JAVA_OPTS! -Dseatunnel.logs.path=%APP_DIR%\logs" + set "JAVA_OPTS=!JAVA_OPTS! -Dseatunnel.logs.file_name=seatunnel-spark-starter" +) + +set "CLASS_PATH=%APP_DIR%\starter\logging\*;%APP_JAR%" + +for /f "delims=" %%i in ('java %JAVA_OPTS% -cp %CLASS_PATH% %APP_MAIN% %args%') do ( + set "CMD=%%i" + setlocal disabledelayedexpansion + if !errorlevel! equ 234 ( + echo !CMD! + endlocal + exit /b 0 + ) else if !errorlevel! equ 0 ( + echo Execute SeaTunnel Spark Job: !CMD! + endlocal + call !CMD! + ) else ( + echo !CMD! + endlocal + exit /b !errorlevel! + ) +) diff --git a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java index c187cceb6c48..5e295ef80450 100644 --- a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java +++ b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java @@ -80,7 +80,6 @@ private SparkStarter(String[] args, SparkCommandArgs commandArgs) { this.commandArgs = commandArgs; } - @SuppressWarnings("checkstyle:RegexpSingleline") public static void main(String[] args) throws IOException { SparkStarter starter = getInstance(args); List command = starter.buildCommands(); @@ -260,7 +259,6 @@ protected void appendAppJar(List commands) { Common.appStarterDir().resolve(EngineType.SPARK2.getStarterJarName()).toString()); } - @SuppressWarnings("checkstyle:Indentation") private List getPluginIdentifiers(Config config, PluginType... pluginTypes) { return Arrays.stream(pluginTypes) .flatMap( diff --git a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-3-starter/src/main/bin/start-seatunnel-spark-3-connector-v2.cmd b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-3-starter/src/main/bin/start-seatunnel-spark-3-connector-v2.cmd new file mode 100644 index 000000000000..433fe23c6d19 --- /dev/null +++ b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-3-starter/src/main/bin/start-seatunnel-spark-3-connector-v2.cmd @@ -0,0 +1,71 @@ +@echo off +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. + +setlocal enabledelayedexpansion + +rem resolve links - %0 may be a softlink +set "PRG=%~f0" +:resolve_loop +rem Get the parent directory of the script +set "PRG_DIR=%~dp0" +rem Change current drive and directory to %PRG_DIR% and execute the 'dir' command, which will fail if %PRG% is not a valid file. +cd /d "%PRG_DIR%" || ( + echo Cannot determine the script's current directory. + exit /b 1 +) + +set "APP_DIR=%~dp0" +set "CONF_DIR=%APP_DIR%\config" +set "APP_JAR=%APP_DIR%\starter\seatunnel-spark-3-starter.jar" +set "APP_MAIN=org.apache.seatunnel.core.starter.spark.SparkStarter" + +if exist "%CONF_DIR%\seatunnel-env.cmd" ( + call "%CONF_DIR%\seatunnel-env.cmd" +) + +if "%~1"=="" ( + set "args=-h" +) else ( + set "args=%*" +) + +set "JAVA_OPTS=" +rem Log4j2 Config +if exist "%CONF_DIR%\log4j2.properties" ( + set "JAVA_OPTS=!JAVA_OPTS! -Dlog4j2.configurationFile=%CONF_DIR%\log4j2.properties" + set "JAVA_OPTS=!JAVA_OPTS! -Dseatunnel.logs.path=%APP_DIR%\logs" + set "JAVA_OPTS=!JAVA_OPTS! -Dseatunnel.logs.file_name=seatunnel-spark-starter" +) + +set "CLASS_PATH=%APP_DIR%\starter\logging\*;%APP_JAR%" + +for /f "delims=" %%i in ('java %JAVA_OPTS% -cp %CLASS_PATH% %APP_MAIN% %args%') do ( + set "CMD=%%i" + setlocal disabledelayedexpansion + if !errorlevel! equ 234 ( + echo !CMD! + endlocal + exit /b 0 + ) else if !errorlevel! equ 0 ( + echo Execute SeaTunnel Spark Job: !CMD! + endlocal + call !CMD! + ) else ( + echo !CMD! + endlocal + exit /b !errorlevel! + ) +) diff --git a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java index 16a3dacad2e6..aa07f4ecdedd 100644 --- a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java +++ b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/main/java/org/apache/seatunnel/core/starter/spark/SparkStarter.java @@ -80,7 +80,6 @@ private SparkStarter(String[] args, SparkCommandArgs commandArgs) { this.commandArgs = commandArgs; } - @SuppressWarnings("checkstyle:RegexpSingleline") public static void main(String[] args) throws IOException { SparkStarter starter = getInstance(args); List command = starter.buildCommands(); @@ -260,7 +259,6 @@ protected void appendAppJar(List commands) { Common.appStarterDir().resolve(EngineType.SPARK3.getStarterJarName()).toString()); } - @SuppressWarnings("checkstyle:Indentation") private List getPluginIdentifiers(Config config, PluginType... pluginTypes) { return Arrays.stream(pluginTypes) .flatMap( diff --git a/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel-cluster.cmd b/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel-cluster.cmd new file mode 100644 index 000000000000..e94a4bb482fa --- /dev/null +++ b/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel-cluster.cmd @@ -0,0 +1,86 @@ +@echo off +REM Licensed to the Apache Software Foundation (ASF) under one or more +REM contributor license agreements. See the NOTICE file distributed with +REM this work for additional information regarding copyright ownership. +REM The ASF licenses this file to You under the Apache License, Version 2.0 +REM (the "License"); you may not use this file except in compliance with +REM the License. You may obtain a copy of the License at +REM +REM http://www.apache.org/licenses/LICENSE-2.0 +REM +REM Unless required by applicable law or agreed to in writing, software +REM distributed under the License is distributed on an "AS IS" BASIS, +REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +REM See the License for the specific language governing permissions and +REM limitations under the License. + +setlocal enabledelayedexpansion + +REM resolve links - %0 may be a softlink +for %%F in ("%~f0") do ( + set "PRG=%%~fF" + set "PRG_DIR=%%~dpF" + set "APP_DIR=%%~dpF.." +) + +set "CONF_DIR=%APP_DIR%\config" +set "APP_JAR=%APP_DIR%\starter\seatunnel-starter.jar" +set "APP_MAIN=org.apache.seatunnel.core.starter.seatunnel.SeaTunnelServer" +set "OUT=%APP_DIR%\logs\seatunnel-server.out" + +set "HELP=false" +set "args=" + +for %%I in (%*) do ( + set "args=!args! %%I" + if "%%I"=="-d" set "DAEMON=true" + if "%%I"=="--daemon" set "DAEMON=true" + if "%%I"=="-h" set "HELP=true" + if "%%I"=="--help" set "HELP=true" +) + +REM SeaTunnel Engine Config +set "HAZELCAST_CONFIG=%CONF_DIR%\hazelcast.yaml" +set "SEATUNNEL_CONFIG=%CONF_DIR%\seatunnel.yaml" +set "JAVA_OPTS=%JvmOption%" + +for %%I in (%*) do ( + set "arg=%%I" + if "!arg:~0,10!"=="JvmOption=" ( + set "JAVA_OPTS=%JAVA_OPTS% !arg:~10!" + ) +) + +set "JAVA_OPTS=%JAVA_OPTS% -Dseatunnel.config=%SEATUNNEL_CONFIG%" +set "JAVA_OPTS=%JAVA_OPTS% -Dhazelcast.config=%HAZELCAST_CONFIG%" +set "JAVA_OPTS=%JAVA_OPTS% -Dlog4j2.contextSelector=org.apache.logging.log4j.core.async.AsyncLoggerContextSelector" + +REM Server Debug Config +REM Usage instructions: +REM If you need to debug your code in cluster mode, please enable this configuration option and listen to the specified +REM port in your IDE. After that, you can happily debug your code. +REM set "JAVA_OPTS=%JAVA_OPTS% -Xdebug -Xrunjdwp:server=y,transport=dt_socket,address=5001,suspend=y" + +if exist "%CONF_DIR%\log4j2.properties" ( + set "JAVA_OPTS=%JAVA_OPTS% -Dlog4j2.configurationFile=%CONF_DIR%\log4j2.properties" + set "JAVA_OPTS=%JAVA_OPTS% -Dseatunnel.logs.path=%APP_DIR%\logs" + set "JAVA_OPTS=%JAVA_OPTS% -Dseatunnel.logs.file_name=seatunnel-engine-server" +) + +set "CLASS_PATH=%APP_DIR%\lib\*;%APP_JAR%" + +for /f "usebackq delims=" %%I in ("%APP_DIR%\config\jvm_options") do ( + set "line=%%I" + if not "!line:~0,1!"=="#" if "!line!" NEQ "" ( + set "JAVA_OPTS=!JAVA_OPTS! !line!" + ) +) + +if "%HELP%"=="false" ( + if not exist "%APP_DIR%\logs\" mkdir "%APP_DIR%\logs" + start "SeaTunnel Server" java %JAVA_OPTS% -cp "%CLASS_PATH%" %APP_MAIN% %args% > "%OUT%" 2>&1 +) else ( + java %JAVA_OPTS% -cp "%CLASS_PATH%" %APP_MAIN% %args% +) + +endlocal \ No newline at end of file diff --git a/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel-cluster.sh b/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel-cluster.sh index 919b545bbf13..e85a97e67a4b 100755 --- a/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel-cluster.sh +++ b/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel-cluster.sh @@ -93,7 +93,7 @@ fi # Usage instructions: # If you need to debug your code in cluster mode, please enable this configuration option and listen to the specified # port in your IDE. After that, you can happily debug your code. -# JAVA_OPTS="${JAVA_OPTS} -Xrunjdwp:server=y,transport=dt_socket,address=8000,suspend=n" +# JAVA_OPTS="${JAVA_OPTS} -Xdebug -Xrunjdwp:server=y,transport=dt_socket,address=5001,suspend=y" CLASS_PATH=${APP_DIR}/lib/*:${APP_JAR} @@ -105,9 +105,12 @@ do done < ${APP_DIR}/config/jvm_options if [[ $DAEMON == true && $HELP == false ]]; then - touch $OUT - nohup java ${JAVA_OPTS} -cp ${CLASS_PATH} ${APP_MAIN} ${args} > "$OUT" 200<&- 2>&1 < /dev/null & - else - java ${JAVA_OPTS} -cp ${CLASS_PATH} ${APP_MAIN} ${args} + if [[ ! -d ${APP_DIR}/logs ]]; then + mkdir -p ${APP_DIR}/logs + fi + touch $OUT + nohup java ${JAVA_OPTS} -cp ${CLASS_PATH} ${APP_MAIN} ${args} > "$OUT" 200<&- 2>&1 < /dev/null & + else + java ${JAVA_OPTS} -cp ${CLASS_PATH} ${APP_MAIN} ${args} fi diff --git a/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel.cmd b/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel.cmd new file mode 100644 index 000000000000..cf9258e9d50b --- /dev/null +++ b/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel.cmd @@ -0,0 +1,108 @@ +@echo off +REM Licensed to the Apache Software Foundation (ASF) under one or more +REM contributor license agreements. See the NOTICE file distributed with +REM this work for additional information regarding copyright ownership. +REM The ASF licenses this file to You under the Apache License, Version 2.0 +REM (the "License"); you may not use this file except in compliance with +REM the License. You may obtain a copy of the License at +REM +REM http://www.apache.org/licenses/LICENSE-2.0 +REM +REM Unless required by applicable law or agreed to in writing, software +REM distributed under the License is distributed on an "AS IS" BASIS, +REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +REM See the License for the specific language governing permissions and +REM limitations under the License. + +setlocal enabledelayedexpansion +REM resolve links - %0 may be a softlink +set "PRG=%~0" + +:resolveLoop +for %%F in ("%PRG%") do ( + set "PRG_DIR=%%~dpF" + set "PRG_NAME=%%~nxF" +) +set "PRG=%PRG_DIR%%PRG_NAME%" + +REM Get application directory +cd "%PRG_DIR%\.." +set "APP_DIR=%CD%" + +set "CONF_DIR=%APP_DIR%\config" +set "APP_JAR=%APP_DIR%\starter\seatunnel-starter.jar" +set "APP_MAIN=org.apache.seatunnel.core.starter.seatunnel.SeaTunnelClient" + +if exist "%CONF_DIR%\seatunnel-env.cmd" call "%CONF_DIR%\seatunnel-env.cmd" + +if "%~1"=="" ( + set "args=-h" +) else ( + set "args=%*" +) + +REM SeaTunnel Engine Config +if not defined HAZELCAST_CLIENT_CONFIG ( + set "HAZELCAST_CLIENT_CONFIG=%CONF_DIR%\hazelcast-client.yaml" +) + +if not defined HAZELCAST_CONFIG ( + set "HAZELCAST_CONFIG=%CONF_DIR%\hazelcast.yaml" +) + +if not defined SEATUNNEL_CONFIG ( + set "SEATUNNEL_CONFIG=%CONF_DIR%\seatunnel.yaml" +) + +if defined JvmOption ( + set "JAVA_OPTS=%JAVA_OPTS% %JvmOption%" +) + +for %%i in (%*) do ( + set "arg=%%i" + if "!arg:~0,9!"=="JvmOption" ( + set "JVM_OPTION=!arg:~9!" + set "JAVA_OPTS=!JAVA_OPTS! !JVM_OPTION!" + goto :break_loop + ) +) +:break_loop + +set "JAVA_OPTS=%JAVA_OPTS% -Dhazelcast.client.config=%HAZELCAST_CLIENT_CONFIG%" +set "JAVA_OPTS=%JAVA_OPTS% -Dseatunnel.config=%SEATUNNEL_CONFIG%" +set "JAVA_OPTS=%JAVA_OPTS% -Dhazelcast.config=%HAZELCAST_CONFIG%" + +REM if you want to debug, please +REM set "JAVA_OPTS=%JAVA_OPTS% -Xdebug -Xrunjdwp:transport=dt_socket,server=y,address=5000,suspend=y" + +REM Log4j2 Config +if exist "%CONF_DIR%\log4j2_client.properties" ( + set "JAVA_OPTS=%JAVA_OPTS% -Dlog4j2.configurationFile=%CONF_DIR%\log4j2_client.properties" + set "JAVA_OPTS=%JAVA_OPTS% -Dseatunnel.logs.path=%APP_DIR%\logs" + for %%i in (%args%) do ( + set "arg=%%i" + if "!arg!"=="-m" set "is_local_mode=true" + if "!arg!"=="--master" set "is_local_mode=true" + if "!arg!"=="-e" set "is_local_mode=true" + if "!arg!"=="--deploy-mode" set "is_local_mode=true" + ) + if defined is_local_mode ( + for /f "tokens=1-3 delims=:" %%A in ('echo %time%') do ( + set "ntime=%%A%%B%%C" + ) + set "JAVA_OPTS=%JAVA_OPTS% -Dseatunnel.logs.file_name=seatunnel-starter-client-!date:~0,4!!date:~5,2!!date:~8,2!-!time:~0,2!!time:~3,2!!time:~6,2!!ntime!" + ) else ( + set "JAVA_OPTS=%JAVA_OPTS% -Dseatunnel.logs.file_name=seatunnel-starter-client" + ) +) + +set "CLASS_PATH=%APP_DIR%\lib\*;%APP_JAR%" + +for /f "usebackq delims=" %%a in ("%APP_DIR%\config\jvm_client_options") do ( + set "line=%%a" + if not "!line:~0,1!"=="#" if "!line!" neq "" ( + set "JAVA_OPTS=!JAVA_OPTS! !line!" + ) +) + +java %JAVA_OPTS% -cp %CLASS_PATH% %APP_MAIN% %args% diff --git a/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel.sh b/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel.sh index 7c25ec126c0e..b95800f1c2c2 100755 --- a/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel.sh +++ b/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel.sh @@ -81,6 +81,12 @@ JAVA_OPTS="${JAVA_OPTS} -Dhazelcast.client.config=${HAZELCAST_CLIENT_CONFIG}" JAVA_OPTS="${JAVA_OPTS} -Dseatunnel.config=${SEATUNNEL_CONFIG}" JAVA_OPTS="${JAVA_OPTS} -Dhazelcast.config=${HAZELCAST_CONFIG}" +# Client Debug Config +# Usage instructions: +# If you need to debug your code in cluster mode, please enable this configuration option and listen to the specified +# port in your IDE. After that, you can happily debug your code. +# JAVA_OPTS="${JAVA_OPTS} -Xdebug -Xrunjdwp:transport=dt_socket,server=y,address=5000,suspend=y" + # Log4j2 Config if [ -e "${CONF_DIR}/log4j2_client.properties" ]; then JAVA_OPTS="${JAVA_OPTS} -Dlog4j2.configurationFile=${CONF_DIR}/log4j2_client.properties" diff --git a/seatunnel-core/seatunnel-starter/src/main/bin/stop-seatunnel-cluster.cmd b/seatunnel-core/seatunnel-starter/src/main/bin/stop-seatunnel-cluster.cmd new file mode 100644 index 000000000000..0c0cb72b0144 --- /dev/null +++ b/seatunnel-core/seatunnel-starter/src/main/bin/stop-seatunnel-cluster.cmd @@ -0,0 +1,58 @@ +@echo off +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. + +setlocal enabledelayedexpansion + +set "SEATUNNEL_DEFAULT_CLUSTER_NAME=seatunnel_default_cluster" +set "SHOW_USAGE=Usage: stop-seatunnel-cluster.bat \n Options: \n -cn, --cluster The name of the cluster to shut down (default: $SEATUNNEL_DEFAULT_CLUSTER_NAME) \n -h, --help Show the usage message" +set "APP_MAIN=org.apache.seatunnel.core.starter.seatunnel.SeaTunnelServer" +set "CLUSTER_NAME=" + +if "%~1"=="" ( + echo !SHOW_USAGE! + exit /B 1 +) + +:parse_args +if "%~1"=="-cn" ( + shift + set "CLUSTER_NAME=%~1" + shift + goto :parse_args +) else if "%~1"=="--cluster" ( + shift + set "CLUSTER_NAME=%~1" + shift + goto :parse_args +) else if "%~1"=="-h" ( + echo !SHOW_USAGE! + exit /B 0 +) else if "%~1"=="--help" ( + echo !SHOW_USAGE! + exit /B 0 +) + +if not defined CLUSTER_NAME ( + for /f %%i in ('tasklist /fi "imagename eq java.exe" ^| find "!APP_MAIN!"') do ( + taskkill /F /PID %%i + ) +) else ( + for /f %%i in ('tasklist /fi "imagename eq java.exe" ^| find "!APP_MAIN!" ^| find "!CLUSTER_NAME!"') do ( + taskkill /F /PID %%i + ) +) + +exit /B 0 \ No newline at end of file diff --git a/seatunnel-core/seatunnel-starter/src/main/java/org/apache/seatunnel/core/starter/seatunnel/command/ClientExecuteCommand.java b/seatunnel-core/seatunnel-starter/src/main/java/org/apache/seatunnel/core/starter/seatunnel/command/ClientExecuteCommand.java index 14b00540f228..449b3f5238c0 100644 --- a/seatunnel-core/seatunnel-starter/src/main/java/org/apache/seatunnel/core/starter/seatunnel/command/ClientExecuteCommand.java +++ b/seatunnel-core/seatunnel-starter/src/main/java/org/apache/seatunnel/core/starter/seatunnel/command/ClientExecuteCommand.java @@ -69,7 +69,6 @@ public ClientExecuteCommand(ClientCommandArgs clientCommandArgs) { this.clientCommandArgs = clientCommandArgs; } - @SuppressWarnings({"checkstyle:RegexpSingleline", "checkstyle:MagicNumber"}) @Override public void execute() throws CommandExecuteException { JobMetricsRunner.JobMetricsSummary jobMetricsSummary = null; @@ -240,7 +239,6 @@ private HazelcastInstance createServerInLocal( new SeaTunnelNodeContext(seaTunnelConfig)); } - @SuppressWarnings("checkstyle:MagicNumber") private String creatRandomClusterName(String namePrefix) { Random random = new Random(); return namePrefix + "-" + random.nextInt(1000000); diff --git a/seatunnel-core/seatunnel-starter/src/test/java/org/apache/seatunnel/core/starter/seatunnel/args/ClientCommandArgsTest.java b/seatunnel-core/seatunnel-starter/src/test/java/org/apache/seatunnel/core/starter/seatunnel/args/ClientCommandArgsTest.java index 5f197367d0d3..c4bd422f2f10 100644 --- a/seatunnel-core/seatunnel-starter/src/test/java/org/apache/seatunnel/core/starter/seatunnel/args/ClientCommandArgsTest.java +++ b/seatunnel-core/seatunnel-starter/src/test/java/org/apache/seatunnel/core/starter/seatunnel/args/ClientCommandArgsTest.java @@ -40,6 +40,7 @@ public void testUserDefinedParamsCommand() throws URISyntaxException { String password = "dsjr42=4wfskahdsd=w1chh"; String fakeSourceTable = "fake"; String fakeSinkTable = "sink"; + String list = "[par1=20230829,par2=20230829]"; String[] args = { "-c", "/args/user_defined_params.conf", @@ -54,7 +55,9 @@ public void testUserDefinedParamsCommand() throws URISyntaxException { "-i", "password=" + password, "-i", - "username=" + username + "username=" + username, + "-i", + "list=" + list, }; ClientCommandArgs clientCommandArgs = CommandLineUtils.parse(args, new ClientCommandArgs(), "seatunnel-zeta", true); @@ -88,6 +91,9 @@ public void testUserDefinedParamsCommand() throws URISyntaxException { Assertions.assertEquals(sinkConfig.getString("username"), username); Assertions.assertEquals(sinkConfig.getString("password"), password); + List list1 = sinkConfig.getStringList("list"); + Assertions.assertEquals(list1.get(0), "par1=20230829"); + Assertions.assertEquals(list1.get(1), "par2=20230829"); } } } diff --git a/seatunnel-core/seatunnel-starter/src/test/resources/args/user_defined_params.conf b/seatunnel-core/seatunnel-starter/src/test/resources/args/user_defined_params.conf index 9dfde35dd6a9..bc2114443f17 100644 --- a/seatunnel-core/seatunnel-starter/src/test/resources/args/user_defined_params.conf +++ b/seatunnel-core/seatunnel-starter/src/test/resources/args/user_defined_params.conf @@ -47,5 +47,6 @@ sink { result_table_name = ${fake_sink_table} username = ${username} password = ${password} + list = ${list} } -} \ No newline at end of file +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-mysql-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/MysqlCDCIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-mysql-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/MysqlCDCIT.java index 1d0d90853fc5..b648febd7d95 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-mysql-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/MysqlCDCIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-mysql-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/MysqlCDCIT.java @@ -55,7 +55,7 @@ @Slf4j @DisabledOnContainer( value = {}, - type = {EngineType.SPARK, EngineType.FLINK}, + type = {EngineType.SPARK}, disabledReason = "Currently SPARK and FLINK do not support cdc") public class MysqlCDCIT extends TestSuiteBase implements TestResource { @@ -88,6 +88,9 @@ public class MysqlCDCIT extends TestSuiteBase implements TestResource { + " f_enum, cast(f_mediumblob as char) as f_mediumblob, f_long_varchar, f_real, f_time, f_tinyint, f_tinyint_unsigned," + " f_json, cast(f_year as year) from mysql_cdc_e2e_sink_table"; + private static final String CLEAN_SOURCE = "truncate table mysql_cdc_e2e_source_table"; + private static final String CLEAN_SINK = "truncate table mysql_cdc_e2e_sink_table"; + private static MySqlContainer createMySqlContainer(MySqlVersion version) { MySqlContainer mySqlContainer = new MySqlContainer(version) @@ -134,6 +137,9 @@ public void startUp() throws ClassNotFoundException, InterruptedException { @TestTemplate public void testMysqlCdcCheckDataE2e(TestContainer container) throws IOException, InterruptedException { + // Clear related content to ensure that multiple operations are not affected + executeSql(CLEAN_SOURCE); + executeSql(CLEAN_SINK); CompletableFuture executeJobFuture = CompletableFuture.supplyAsync( diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-sqlserver-e2e/src/test/java/org/apache/seatunnel/e2e/connector/cdc/sqlserver/SqlServerCDCIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-sqlserver-e2e/src/test/java/org/apache/seatunnel/e2e/connector/cdc/sqlserver/SqlServerCDCIT.java index 8bca3e3b0369..bfe2a3588894 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-sqlserver-e2e/src/test/java/org/apache/seatunnel/e2e/connector/cdc/sqlserver/SqlServerCDCIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-sqlserver-e2e/src/test/java/org/apache/seatunnel/e2e/connector/cdc/sqlserver/SqlServerCDCIT.java @@ -65,7 +65,7 @@ @Slf4j @DisabledOnContainer( value = {}, - type = {EngineType.SPARK, EngineType.FLINK}, + type = {EngineType.SPARK}, disabledReason = "Currently SPARK and FLINK do not support cdc") public class SqlServerCDCIT extends TestSuiteBase implements TestResource { diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iotdb-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iotdb/IoTDBIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iotdb-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iotdb/IoTDBIT.java index 94bfbe917e27..8b8d6acd77b6 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iotdb-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iotdb/IoTDBIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iotdb-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iotdb/IoTDBIT.java @@ -63,7 +63,7 @@ value = {}, type = {EngineType.SPARK}, disabledReason = - "There is a conflict of thrift version between IoTDB and Spark.Therefore. Refactor starter module, so disabled in flink") + "There is a conflict of thrift version between IoTDB and Spark.Therefore. Refactor starter module, so disabled in spark") public class IoTDBIT extends TestSuiteBase implements TestResource { private static final String IOTDB_DOCKER_IMAGE = "apache/iotdb:0.13.1-node"; diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/AbstractJdbcIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/AbstractJdbcIT.java index 6528be0e1fca..a38fb2217f29 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/AbstractJdbcIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/AbstractJdbcIT.java @@ -17,6 +17,9 @@ package org.apache.seatunnel.connectors.seatunnel.jdbc; +import org.apache.seatunnel.api.table.catalog.Catalog; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.common.exception.SeaTunnelRuntimeException; import org.apache.seatunnel.common.utils.ExceptionUtils; @@ -31,6 +34,7 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestTemplate; import org.testcontainers.containers.Container; import org.testcontainers.containers.GenericContainer; @@ -76,6 +80,7 @@ public abstract class AbstractJdbcIT extends TestSuiteBase implements TestResour protected GenericContainer dbServer; protected JdbcCase jdbcCase; protected Connection connection; + protected Catalog catalog; abstract JdbcCase getJdbcCase(); @@ -141,12 +146,16 @@ protected void createNeededTables() { String.format( createTemplate, buildTableInfoWithSchema( - jdbcCase.getDatabase(), jdbcCase.getSourceTable())); + jdbcCase.getDatabase(), + jdbcCase.getSchema(), + jdbcCase.getSourceTable())); String createSink = String.format( createTemplate, buildTableInfoWithSchema( - jdbcCase.getDatabase(), jdbcCase.getSinkTable())); + jdbcCase.getDatabase(), + jdbcCase.getSchema(), + jdbcCase.getSinkTable())); statement.execute(createSource); statement.execute(createSink); @@ -173,6 +182,14 @@ public String insertTable(String schema, String table, String... fields) { + ")"; } + protected void clearTable(String database, String schema, String table) { + clearTable(database, table); + } + + protected String buildTableInfoWithSchema(String database, String schema, String table) { + return buildTableInfoWithSchema(database, table); + } + public void clearTable(String schema, String table) { try (Statement statement = connection.createStatement()) { statement.execute("TRUNCATE TABLE " + buildTableInfoWithSchema(schema, table)); @@ -215,6 +232,7 @@ public void startUp() { createSchemaIfNeeded(); createNeededTables(); insertTestData(); + initCatalog(); } @Override @@ -226,6 +244,10 @@ public void tearDown() throws SQLException { if (connection != null) { connection.close(); } + + if (catalog != null) { + catalog.close(); + } } @TestTemplate @@ -238,6 +260,43 @@ public void testJdbcDb(TestContainer container) } compareResult(); - clearTable(jdbcCase.getDatabase(), jdbcCase.getSinkTable()); + clearTable(jdbcCase.getDatabase(), jdbcCase.getSchema(), jdbcCase.getSinkTable()); + } + + protected void initCatalog() {} + + @Test + public void testCatalog() { + if (catalog == null) { + return; + } + + TablePath sourceTablePath = + new TablePath( + jdbcCase.getDatabase(), jdbcCase.getSchema(), jdbcCase.getSourceTable()); + TablePath targetTablePath = + new TablePath( + jdbcCase.getCatalogDatabase(), + jdbcCase.getCatalogSchema(), + jdbcCase.getCatalogTable()); + boolean createdDb = false; + + if (!catalog.databaseExists(targetTablePath.getDatabaseName())) { + catalog.createDatabase(targetTablePath, false); + Assertions.assertTrue(catalog.databaseExists(targetTablePath.getDatabaseName())); + createdDb = true; + } + + CatalogTable catalogTable = catalog.getTable(sourceTablePath); + catalog.createTable(targetTablePath, catalogTable, false); + Assertions.assertTrue(catalog.tableExists(targetTablePath)); + + catalog.dropTable(targetTablePath, false); + Assertions.assertFalse(catalog.tableExists(targetTablePath)); + + if (createdDb) { + catalog.dropDatabase(targetTablePath, false); + Assertions.assertFalse(catalog.databaseExists(targetTablePath.getDatabaseName())); + } } } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcCase.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcCase.java index 805fcbd16bb9..5f17eacc51ad 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcCase.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcCase.java @@ -41,6 +41,7 @@ public class JdbcCase { private int port; private int localPort; private String database; + private String schema; private String sourceTable; private String sinkTable; private String jdbcTemplate; @@ -50,4 +51,8 @@ public class JdbcCase { private List configFile; private Pair> testData; private Map containerEnv; + + private String catalogDatabase; + private String catalogSchema; + private String catalogTable; } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java index f4b1338b15b5..b10aa0c2225f 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java @@ -19,6 +19,8 @@ package org.apache.seatunnel.connectors.seatunnel.jdbc; import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.common.utils.JdbcUrlUtil; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.mysql.MySqlCatalog; import org.apache.commons.lang3.tuple.Pair; @@ -48,6 +50,7 @@ public class JdbcMysqlIT extends AbstractJdbcIT { private static final String MYSQL_DATABASE = "seatunnel"; private static final String MYSQL_SOURCE = "source"; private static final String MYSQL_SINK = "sink"; + private static final String CATALOG_DATABASE = "catalog_database"; private static final String MYSQL_USERNAME = "root"; private static final String MYSQL_PASSWORD = "Abc!@#135_seatunnel"; @@ -138,6 +141,8 @@ JdbcCase getJdbcCase() { .configFile(CONFIG_FILE) .insertSql(insertSql) .testData(testDataSet) + .catalogDatabase(CATALOG_DATABASE) + .catalogTable(MYSQL_SINK) .build(); } @@ -282,4 +287,16 @@ protected GenericContainer initContainer() { return container; } + + @Override + protected void initCatalog() { + catalog = + new MySqlCatalog( + "mysql", + jdbcCase.getUserName(), + jdbcCase.getPassword(), + JdbcUrlUtil.getUrlInfo( + jdbcCase.getJdbcUrl().replace(HOST, dbServer.getHost()))); + catalog.open(); + } } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java index d0f8ce3b6879..75bdffbd6cad 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java @@ -19,6 +19,8 @@ package org.apache.seatunnel.connectors.seatunnel.jdbc; import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.oracle.OracleCatalog; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.oracle.OracleURLParser; import org.apache.commons.lang3.tuple.Pair; @@ -27,6 +29,7 @@ import org.testcontainers.containers.output.Slf4jLogConsumer; import org.testcontainers.utility.DockerImageName; import org.testcontainers.utility.DockerLoggerFactory; +import org.testcontainers.utility.MountableFile; import com.google.common.collect.Lists; @@ -47,11 +50,13 @@ public class JdbcOracleIT extends AbstractJdbcIT { private static final String DRIVER_CLASS = "oracle.jdbc.OracleDriver"; private static final int ORACLE_PORT = 1521; private static final String ORACLE_URL = "jdbc:oracle:thin:@" + HOST + ":%s/%s"; - private static final String USERNAME = "testUser"; + private static final String USERNAME = "TESTUSER"; private static final String PASSWORD = "testPassword"; - private static final String DATABASE = "TESTUSER"; + private static final String DATABASE = "XE"; + private static final String SCHEMA = USERNAME; private static final String SOURCE_TABLE = "E2E_TABLE_SOURCE"; private static final String SINK_TABLE = "E2E_TABLE_SINK"; + private static final String CATALOG_TABLE = "E2E_TABLE_CATALOG"; private static final List CONFIG_FILE = Lists.newArrayList("/jdbc_oracle_source_to_sink.conf"); @@ -78,11 +83,11 @@ JdbcCase getJdbcCase() { containerEnv.put("ORACLE_PASSWORD", PASSWORD); containerEnv.put("APP_USER", USERNAME); containerEnv.put("APP_USER_PASSWORD", PASSWORD); - String jdbcUrl = String.format(ORACLE_URL, ORACLE_PORT, DATABASE); + String jdbcUrl = String.format(ORACLE_URL, ORACLE_PORT, SCHEMA); Pair> testDataSet = initTestData(); String[] fieldNames = testDataSet.getKey(); - String insertSql = insertTable(DATABASE, SOURCE_TABLE, fieldNames); + String insertSql = insertTable(SCHEMA, SOURCE_TABLE, fieldNames); return JdbcCase.builder() .dockerImage(ORACLE_IMAGE) @@ -97,8 +102,12 @@ JdbcCase getJdbcCase() { .userName(USERNAME) .password(PASSWORD) .database(DATABASE) + .schema(SCHEMA) .sourceTable(SOURCE_TABLE) .sinkTable(SINK_TABLE) + .catalogDatabase(DATABASE) + .catalogSchema(SCHEMA) + .catalogTable(CATALOG_TABLE) .createSql(CREATE_SQL) .configFile(CONFIG_FILE) .insertSql(insertSql) @@ -162,9 +171,10 @@ GenericContainer initContainer() { GenericContainer container = new OracleContainer(imageName) - .withDatabaseName(DATABASE) - .withUsername(USERNAME) - .withPassword(PASSWORD) + .withDatabaseName(SCHEMA) + .withCopyFileToContainer( + MountableFile.forClasspathResource("sql/oracle_init.sql"), + "/container-entrypoint-startdb.d/init.sql") .withNetwork(NETWORK) .withNetworkAliases(ORACLE_NETWORK_ALIASES) .withExposedPorts(ORACLE_PORT) @@ -181,4 +191,27 @@ GenericContainer initContainer() { public String quoteIdentifier(String field) { return "\"" + field + "\""; } + + @Override + protected void clearTable(String database, String schema, String table) { + clearTable(schema, table); + } + + @Override + protected String buildTableInfoWithSchema(String database, String schema, String table) { + return buildTableInfoWithSchema(schema, table); + } + + @Override + protected void initCatalog() { + String jdbcUrl = jdbcCase.getJdbcUrl().replace(HOST, dbServer.getHost()); + catalog = + new OracleCatalog( + "oracle", + jdbcCase.getUserName(), + jdbcCase.getPassword(), + OracleURLParser.parse(jdbcUrl), + SCHEMA); + catalog.open(); + } } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcPostgresIdentifierIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcPostgresIdentifierIT.java new file mode 100644 index 000000000000..13adec70084c --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcPostgresIdentifierIT.java @@ -0,0 +1,387 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc; + +import org.apache.seatunnel.e2e.common.TestResource; +import org.apache.seatunnel.e2e.common.TestSuiteBase; +import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; +import org.apache.seatunnel.e2e.common.container.EngineType; +import org.apache.seatunnel.e2e.common.container.TestContainer; +import org.apache.seatunnel.e2e.common.junit.DisabledOnContainer; +import org.apache.seatunnel.e2e.common.junit.TestContainerExtension; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.TestTemplate; +import org.testcontainers.containers.Container; +import org.testcontainers.containers.PostgreSQLContainer; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.lifecycle.Startables; +import org.testcontainers.utility.DockerImageName; +import org.testcontainers.utility.DockerLoggerFactory; + +import com.google.common.collect.Lists; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.stream.Stream; + +import static org.awaitility.Awaitility.given; + +@Slf4j +@DisabledOnContainer( + value = {}, + type = {EngineType.SPARK, EngineType.FLINK}, + disabledReason = "Currently SPARK and FLINK do not support cdc") +public class JdbcPostgresIdentifierIT extends TestSuiteBase implements TestResource { + private static final String PG_IMAGE = "postgis/postgis"; + private static final String PG_DRIVER_JAR = + "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.3.3/postgresql-42.3.3.jar"; + private static final String PG_JDBC_JAR = + "https://repo1.maven.org/maven2/net/postgis/postgis-jdbc/2.5.1/postgis-jdbc-2.5.1.jar"; + private static final String PG_GEOMETRY_JAR = + "https://repo1.maven.org/maven2/net/postgis/postgis-geometry/2.5.1/postgis-geometry-2.5.1.jar"; + private static final List PG_CONFIG_FILE_LIST = + Lists.newArrayList("/jdbc_postgres_ide_source_and_sink.conf"); + private PostgreSQLContainer POSTGRESQL_CONTAINER; + private static final String PG_SOURCE_DDL = + "CREATE TABLE IF NOT EXISTS pg_ide_source_table (\n" + + " gid SERIAL PRIMARY KEY,\n" + + " text_col TEXT,\n" + + " varchar_col VARCHAR(255),\n" + + " char_col CHAR(10),\n" + + " boolean_col bool,\n" + + " smallint_col int2,\n" + + " integer_col int4,\n" + + " bigint_col BIGINT,\n" + + " decimal_col DECIMAL(10, 2),\n" + + " numeric_col NUMERIC(8, 4),\n" + + " real_col float4,\n" + + " double_precision_col float8,\n" + + " smallserial_col SMALLSERIAL,\n" + + " serial_col SERIAL,\n" + + " bigserial_col BIGSERIAL,\n" + + " date_col DATE,\n" + + " timestamp_col TIMESTAMP,\n" + + " bpchar_col BPCHAR(10),\n" + + " age INT NOT null,\n" + + " name VARCHAR(255) NOT null,\n" + + " point geometry(POINT, 4326),\n" + + " linestring geometry(LINESTRING, 4326),\n" + + " polygon_colums geometry(POLYGON, 4326),\n" + + " multipoint geometry(MULTIPOINT, 4326),\n" + + " multilinestring geometry(MULTILINESTRING, 4326),\n" + + " multipolygon geometry(MULTIPOLYGON, 4326),\n" + + " geometrycollection geometry(GEOMETRYCOLLECTION, 4326),\n" + + " geog geography(POINT, 4326)\n" + + ")"; + private static final String PG_SINK_DDL = + "CREATE TABLE IF NOT EXISTS test.public.\"PG_IDE_SINK_TABLE\" (\n" + + " \"GID\" SERIAL PRIMARY KEY,\n" + + " \"TEXT_COL\" TEXT,\n" + + " \"VARCHAR_COL\" VARCHAR(255),\n" + + " \"CHAR_COL\" CHAR(10),\n" + + " \"BOOLEAN_COL\" bool,\n" + + " \"SMALLINT_COL\" int2,\n" + + " \"INTEGER_COL\" int4,\n" + + " \"BIGINT_COL\" BIGINT,\n" + + " \"DECIMAL_COL\" DECIMAL(10, 2),\n" + + " \"NUMERIC_COL\" NUMERIC(8, 4),\n" + + " \"REAL_COL\" float4,\n" + + " \"DOUBLE_PRECISION_COL\" float8,\n" + + " \"SMALLSERIAL_COL\" SMALLSERIAL,\n" + + " \"SERIAL_COL\" SERIAL,\n" + + " \"BIGSERIAL_COL\" BIGSERIAL,\n" + + " \"DATE_COL\" DATE,\n" + + " \"TIMESTAMP_COL\" TIMESTAMP,\n" + + " \"BPCHAR_COL\" BPCHAR(10),\n" + + " \"AGE\" int4 NOT NULL,\n" + + " \"NAME\" varchar(255) NOT NULL,\n" + + " \"POINT\" varchar(2000) NULL,\n" + + " \"LINESTRING\" varchar(2000) NULL,\n" + + " \"POLYGON_COLUMS\" varchar(2000) NULL,\n" + + " \"MULTIPOINT\" varchar(2000) NULL,\n" + + " \"MULTILINESTRING\" varchar(2000) NULL,\n" + + " \"MULTIPOLYGON\" varchar(2000) NULL,\n" + + " \"GEOMETRYCOLLECTION\" varchar(2000) NULL,\n" + + " \"GEOG\" varchar(2000) NULL\n" + + " )"; + + private static final String SOURCE_SQL = + "select \n" + + "gid,\n" + + "text_col,\n" + + "varchar_col,\n" + + "char_col,\n" + + "boolean_col,\n" + + "smallint_col,\n" + + "integer_col,\n" + + "bigint_col,\n" + + "decimal_col,\n" + + "numeric_col,\n" + + "real_col,\n" + + "double_precision_col,\n" + + "smallserial_col,\n" + + "serial_col,\n" + + "bigserial_col,\n" + + "date_col,\n" + + "timestamp_col,\n" + + "bpchar_col,\n" + + "age,\n" + + "name,\n" + + "point,\n" + + "linestring,\n" + + "polygon_colums,\n" + + "multipoint,\n" + + "multilinestring,\n" + + "multipolygon,\n" + + "geometrycollection,\n" + + "geog\n" + + " from pg_ide_source_table"; + private static final String SINK_SQL = + "SELECT\n" + + " \"GID\",\n" + + " \"TEXT_COL\",\n" + + " \"VARCHAR_COL\",\n" + + " \"CHAR_COL\",\n" + + " \"BOOLEAN_COL\",\n" + + " \"SMALLINT_COL\",\n" + + " \"INTEGER_COL\",\n" + + " \"BIGINT_COL\",\n" + + " \"DECIMAL_COL\",\n" + + " \"NUMERIC_COL\",\n" + + " \"REAL_COL\",\n" + + " \"DOUBLE_PRECISION_COL\",\n" + + " \"SMALLSERIAL_COL\",\n" + + " \"SERIAL_COL\",\n" + + " \"BIGSERIAL_COL\",\n" + + " \"DATE_COL\",\n" + + " \"TIMESTAMP_COL\",\n" + + " \"BPCHAR_COL\",\n" + + " \"AGE\",\n" + + " \"NAME\",\n" + + " CAST(\"POINT\" AS GEOMETRY) AS POINT,\n" + + " CAST(\"LINESTRING\" AS GEOMETRY) AS LINESTRING,\n" + + " CAST(\"POLYGON_COLUMS\" AS GEOMETRY) AS POLYGON_COLUMS,\n" + + " CAST(\"MULTIPOINT\" AS GEOMETRY) AS MULTIPOINT,\n" + + " CAST(\"MULTILINESTRING\" AS GEOMETRY) AS MULTILINESTRING,\n" + + " CAST(\"MULTIPOLYGON\" AS GEOMETRY) AS MULTILINESTRING,\n" + + " CAST(\"GEOMETRYCOLLECTION\" AS GEOMETRY) AS GEOMETRYCOLLECTION,\n" + + " CAST(\"GEOG\" AS GEOGRAPHY) AS GEOG\n" + + "FROM\n" + + " \"PG_IDE_SINK_TABLE\";"; + + @TestContainerExtension + private final ContainerExtendedFactory extendedFactory = + container -> { + Container.ExecResult extraCommands = + container.execInContainer( + "bash", + "-c", + "mkdir -p /tmp/seatunnel/plugins/Jdbc/lib && cd /tmp/seatunnel/plugins/Jdbc/lib && curl -O " + + PG_DRIVER_JAR + + " && curl -O " + + PG_JDBC_JAR + + " && curl -O " + + PG_GEOMETRY_JAR); + Assertions.assertEquals(0, extraCommands.getExitCode()); + }; + + @BeforeAll + @Override + public void startUp() throws Exception { + POSTGRESQL_CONTAINER = + new PostgreSQLContainer<>( + DockerImageName.parse(PG_IMAGE) + .asCompatibleSubstituteFor("postgres")) + .withNetwork(TestSuiteBase.NETWORK) + .withNetworkAliases("postgresql") + .withCommand("postgres -c max_prepared_transactions=100") + .withLogConsumer( + new Slf4jLogConsumer(DockerLoggerFactory.getLogger(PG_IMAGE))); + Startables.deepStart(Stream.of(POSTGRESQL_CONTAINER)).join(); + log.info("PostgreSQL container started"); + Class.forName(POSTGRESQL_CONTAINER.getDriverClassName()); + given().ignoreExceptions() + .await() + .atLeast(100, TimeUnit.MILLISECONDS) + .pollInterval(500, TimeUnit.MILLISECONDS) + .atMost(2, TimeUnit.MINUTES) + .untilAsserted(this::initializeJdbcTable); + log.info("pg data initialization succeeded. Procedure"); + } + + @TestTemplate + public void testAutoGenerateSQL(TestContainer container) + throws IOException, InterruptedException { + for (String CONFIG_FILE : PG_CONFIG_FILE_LIST) { + Container.ExecResult execResult = container.executeJob(CONFIG_FILE); + Assertions.assertEquals(0, execResult.getExitCode()); + Assertions.assertIterableEquals(querySql(SOURCE_SQL), querySql(SINK_SQL)); + executeSQL("truncate table \"PG_IDE_SINK_TABLE\""); + log.info(CONFIG_FILE + " e2e test completed"); + } + } + + private void initializeJdbcTable() { + try (Connection connection = getJdbcConnection()) { + Statement statement = connection.createStatement(); + statement.execute(PG_SOURCE_DDL); + statement.execute(PG_SINK_DDL); + for (int i = 1; i <= 10; i++) { + statement.addBatch( + "INSERT INTO\n" + + " pg_ide_source_table (gid,\n" + + " text_col,\n" + + " varchar_col,\n" + + " char_col,\n" + + " boolean_col,\n" + + " smallint_col,\n" + + " integer_col,\n" + + " bigint_col,\n" + + " decimal_col,\n" + + " numeric_col,\n" + + " real_col,\n" + + " double_precision_col,\n" + + " smallserial_col,\n" + + " serial_col,\n" + + " bigserial_col,\n" + + " date_col,\n" + + " timestamp_col,\n" + + " bpchar_col,\n" + + " age,\n" + + " name,\n" + + " point,\n" + + " linestring,\n" + + " polygon_colums,\n" + + " multipoint,\n" + + " multilinestring,\n" + + " multipolygon,\n" + + " geometrycollection,\n" + + " geog\n" + + " )\n" + + "VALUES\n" + + " (\n" + + " '" + + i + + "',\n" + + " 'Hello World',\n" + + " 'Test',\n" + + " 'Testing',\n" + + " true,\n" + + " 10,\n" + + " 100,\n" + + " 1000,\n" + + " 10.55,\n" + + " 8.8888,\n" + + " 3.14,\n" + + " 3.14159265,\n" + + " 1,\n" + + " 100,\n" + + " 10000,\n" + + " '2023-05-07',\n" + + " '2023-05-07 14:30:00',\n" + + " 'Testing',\n" + + " 21,\n" + + " 'Leblanc',\n" + + " ST_GeomFromText('POINT(-122.3452 47.5925)', 4326),\n" + + " ST_GeomFromText(\n" + + " 'LINESTRING(-122.3451 47.5924, -122.3449 47.5923)',\n" + + " 4326\n" + + " ),\n" + + " ST_GeomFromText(\n" + + " 'POLYGON((-122.3453 47.5922, -122.3453 47.5926, -122.3448 47.5926, -122.3448 47.5922, -122.3453 47.5922))',\n" + + " 4326\n" + + " ),\n" + + " ST_GeomFromText(\n" + + " 'MULTIPOINT(-122.3459 47.5927, -122.3445 47.5918)',\n" + + " 4326\n" + + " ),\n" + + " ST_GeomFromText(\n" + + " 'MULTILINESTRING((-122.3463 47.5920, -122.3461 47.5919),(-122.3459 47.5924, -122.3457 47.5923))',\n" + + " 4326\n" + + " ),\n" + + " ST_GeomFromText(\n" + + " 'MULTIPOLYGON(((-122.3458 47.5925, -122.3458 47.5928, -122.3454 47.5928, -122.3454 47.5925, -122.3458 47.5925)),((-122.3453 47.5921, -122.3453 47.5924, -122.3448 47.5924, -122.3448 47.5921, -122.3453 47.5921)))',\n" + + " 4326\n" + + " ),\n" + + " ST_GeomFromText(\n" + + " 'GEOMETRYCOLLECTION(POINT(-122.3462 47.5921), LINESTRING(-122.3460 47.5924, -122.3457 47.5924))',\n" + + " 4326\n" + + " ),\n" + + " ST_GeographyFromText('POINT(-122.3452 47.5925)')\n" + + " )"); + } + + statement.executeBatch(); + } catch (SQLException e) { + throw new RuntimeException("Initializing PostgreSql table failed!", e); + } + } + + private Connection getJdbcConnection() throws SQLException { + return DriverManager.getConnection( + POSTGRESQL_CONTAINER.getJdbcUrl(), + POSTGRESQL_CONTAINER.getUsername(), + POSTGRESQL_CONTAINER.getPassword()); + } + + private List> querySql(String sql) { + try (Connection connection = getJdbcConnection()) { + ResultSet resultSet = connection.createStatement().executeQuery(sql); + List> result = new ArrayList<>(); + int columnCount = resultSet.getMetaData().getColumnCount(); + while (resultSet.next()) { + ArrayList objects = new ArrayList<>(); + for (int i = 1; i <= columnCount; i++) { + objects.add(resultSet.getObject(i)); + } + result.add(objects); + } + return result; + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + + private void executeSQL(String sql) { + try (Connection connection = getJdbcConnection()) { + Statement statement = connection.createStatement(); + statement.execute(sql); + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + + @AfterAll + @Override + public void tearDown() { + if (POSTGRESQL_CONTAINER != null) { + POSTGRESQL_CONTAINER.stop(); + } + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_postgres_ide_source_and_sink.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_postgres_ide_source_and_sink.conf new file mode 100644 index 000000000000..52f9c065700c --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_postgres_ide_source_and_sink.conf @@ -0,0 +1,48 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + job.mode = "BATCH" +} + +source{ + jdbc{ + url = "jdbc:postgresql://postgresql:5432/test?loggerLevel=OFF" + driver = "org.postgresql.Driver" + user = "test" + password = "test" + query ="""select gid, text_col, varchar_col, char_col, boolean_col, smallint_col, integer_col, bigint_col, decimal_col, numeric_col, real_col, double_precision_col, + smallserial_col, serial_col, bigserial_col, date_col, timestamp_col, bpchar_col, age, name, point, linestring, polygon_colums, multipoint, + multilinestring, multipolygon, geometrycollection, geog from pg_ide_source_table""" + } +} + + +sink { + Jdbc { + driver = org.postgresql.Driver + url = "jdbc:postgresql://postgresql:5432/test?loggerLevel=OFF" + user = test + password = test + generate_sink_sql = true + field_ide = UPPERCASE + database = test + table = "public.PG_IDE_SINK_TABLE" + primary_keys = ["gid"] + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/sql/oracle_init.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/sql/oracle_init.sql new file mode 100644 index 000000000000..ba77de271ea6 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/sql/oracle_init.sql @@ -0,0 +1,22 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +ALTER SESSION SET CONTAINER = TESTUSER; + +CREATE USER TESTUSER IDENTIFIED BY testPassword; + +GRANT DBA TO TESTUSER; \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcKingbaseIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcKingbaseIT.java new file mode 100644 index 000000000000..17d53bb87d94 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcKingbaseIT.java @@ -0,0 +1,223 @@ +package org.apache.seatunnel.connectors.seatunnel.jdbc; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.common.exception.SeaTunnelRuntimeException; +import org.apache.seatunnel.common.utils.ExceptionUtils; + +import org.apache.commons.lang3.tuple.Pair; + +import org.junit.jupiter.api.Disabled; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.utility.DockerLoggerFactory; + +import com.google.common.collect.Lists; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.math.BigDecimal; +import java.sql.SQLException; +import java.sql.Statement; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * If you want to run this e2e, you need to download km license from + * https://www.kingbase.com.cn/sqwjxz/index.htm and modify the KM_LICENSE_PATH variable to the + * address where you downloaded the certificate. Also, remove the @Disabled annotation. The spark + * engine does not support the TIME type.Two environment variables need to be added to the spark + * container: "LANG"="C.UTF-8", "JAVA_TOOL_OPTIONS"="-Dfile.encoding=UTF8" + */ +@Slf4j +@Disabled("Due to copyright reasons, you need to download the trial version km license yourself") +public class JdbcKingbaseIT extends AbstractJdbcIT { + private static final String KINGBASE_IMAGE = "huzhihui/kingbase:v8r6"; + private static final String KINGBASE_CONTAINER_HOST = "e2e_KINGBASEDb"; + private static final String KINGBASE_DATABASE = "test"; + private static final String KINGBASE_SCHEMA = "public"; + private static final String KINGBASE_SOURCE = "e2e_table_source"; + private static final String KINGBASE_SINK = "e2e_table_sink"; + + private static final String KINGBASE_USERNAME = "SYSTEM"; + private static final String KINGBASE_PASSWORD = "123456"; + private static final int KINGBASE_PORT = 54321; + private static final String KINGBASE_URL = "jdbc:kingbase8://" + HOST + ":%s/test"; + private static final String DRIVER_CLASS = "com.kingbase8.Driver"; + private static final String KM_LICENSE_PATH = "KM_LICENSE_PATH"; + + private static final List CONFIG_FILE = + Lists.newArrayList("/jdbc_kingbase_source_and_sink.conf"); + private static final String CREATE_SQL = + "create table %s \n" + + "(\n" + + " c1 SMALLSERIAL,\n" + + " c2 SERIAL,\n" + + " c3 BIGSERIAL,\n" + + " c5 INT2,\n" + + " c7 INT4,\n" + + " c9 INT8,\n" + + " c11 FLOAT4,\n" + + " c13 FLOAT8,\n" + + " c15 NUMERIC,\n" + + " c16 BOOL,\n" + + " c18 TIMESTAMP,\n" + + " c19 DATE,\n" + + " c20 TIME,\n" + + " c21 TEXT,\n" + + " c23 BPCHAR,\n" + + " c25 CHARACTER,\n" + + " c26 VARCHAR\n" + + ");\n"; + + @Override + JdbcCase getJdbcCase() { + Map containerEnv = new HashMap<>(); + String jdbcUrl = String.format(KINGBASE_URL, KINGBASE_PORT); + Pair> testDataSet = initTestData(); + String[] fieldNames = testDataSet.getKey(); + + String insertSql = insertTable(KINGBASE_SCHEMA, KINGBASE_SOURCE, fieldNames); + + return JdbcCase.builder() + .dockerImage(KINGBASE_IMAGE) + .networkAliases(KINGBASE_CONTAINER_HOST) + .containerEnv(containerEnv) + .driverClass(DRIVER_CLASS) + .host(HOST) + .port(KINGBASE_PORT) + .localPort(KINGBASE_PORT) + .jdbcTemplate(KINGBASE_URL) + .jdbcUrl(jdbcUrl) + .userName(KINGBASE_USERNAME) + .password(KINGBASE_PASSWORD) + .database(KINGBASE_DATABASE) + .sourceTable(KINGBASE_SOURCE) + .sinkTable(KINGBASE_SINK) + .createSql(CREATE_SQL) + .configFile(CONFIG_FILE) + .insertSql(insertSql) + .testData(testDataSet) + .build(); + } + + @Override + void compareResult() throws SQLException, IOException {} + + @Override + String driverUrl() { + return "https://repo1.maven.org/maven2/cn/com/kingbase/kingbase8/8.6.0/kingbase8-8.6.0.jar"; + } + + @Override + Pair> initTestData() { + String[] fieldNames = + new String[] { + "c1", "c2", "c3", "c5", "c7", "c9", "c11", "c13", "c15", "c16", "c18", "c19", + "c20", "c21", "c23", "c25", "c26" + }; + List rows = new ArrayList<>(); + for (int i = 0; i < 100; i++) { + SeaTunnelRow row = + new SeaTunnelRow( + new Object[] { + i, + Long.parseLong(String.valueOf(i)), + Long.parseLong(String.valueOf(i)), + (short) i, + i, + Long.parseLong(String.valueOf(i)), + Float.parseFloat("1.1"), + Double.parseDouble("1.1"), + BigDecimal.valueOf(i, 10), + true, + LocalDateTime.now(), + LocalDate.now(), + LocalTime.now(), + String.valueOf(i), + String.valueOf(i), + String.valueOf(1), + String.valueOf(i) + }); + rows.add(row); + } + + return Pair.of(fieldNames, rows); + } + + @Override + GenericContainer initContainer() { + GenericContainer container = + new GenericContainer<>(KINGBASE_IMAGE) + .withNetwork(NETWORK) + .withNetworkAliases(KINGBASE_CONTAINER_HOST) + .withEnv("KINGBASE_SYSTEM_PASSWORD", "123456") + .withFileSystemBind(KM_LICENSE_PATH, "/home/kingbase/license.dat") + .withLogConsumer( + new Slf4jLogConsumer( + DockerLoggerFactory.getLogger(KINGBASE_IMAGE))); + container.setPortBindings( + Lists.newArrayList(String.format("%s:%s", KINGBASE_PORT, KINGBASE_PORT))); + return container; + } + + protected void createNeededTables() { + try (Statement statement = connection.createStatement()) { + String createTemplate = jdbcCase.getCreateSql(); + + String createSource = + String.format( + createTemplate, KINGBASE_SCHEMA + "." + jdbcCase.getSourceTable()); + String createSink = + String.format(createTemplate, KINGBASE_SCHEMA + "." + jdbcCase.getSinkTable()); + + statement.execute(createSource); + statement.execute(createSink); + + connection.commit(); + } catch (Exception exception) { + log.error(ExceptionUtils.getMessage(exception)); + throw new SeaTunnelRuntimeException(JdbcITErrorCode.CREATE_TABLE_FAILED, exception); + } + } + + public String insertTable(String schema, String table, String... fields) { + String columns = String.join(", ", fields); + String placeholders = Arrays.stream(fields).map(f -> "?").collect(Collectors.joining(", ")); + + return "INSERT INTO " + + schema + + "." + + table + + " (" + + columns + + " )" + + " VALUES (" + + placeholders + + ")"; + } + + public void clearTable(String schema, String table) {} +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcPostgresIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcPostgresIT.java index 6a3eb231b271..a5796c1aaac8 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcPostgresIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcPostgresIT.java @@ -17,6 +17,11 @@ package org.apache.seatunnel.connectors.seatunnel.jdbc; +import org.apache.seatunnel.api.table.catalog.Catalog; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.common.utils.JdbcUrlUtil; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.psql.PostgresCatalog; import org.apache.seatunnel.e2e.common.TestResource; import org.apache.seatunnel.e2e.common.TestSuiteBase; import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; @@ -26,6 +31,7 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestTemplate; import org.testcontainers.containers.Container; import org.testcontainers.containers.PostgreSQLContainer; @@ -95,7 +101,9 @@ public class JdbcPostgresIT extends TestSuiteBase implements TestResource { + " multilinestring geometry(MULTILINESTRING, 4326),\n" + " multipolygon geometry(MULTIPOLYGON, 4326),\n" + " geometrycollection geometry(GEOMETRYCOLLECTION, 4326),\n" - + " geog geography(POINT, 4326)\n" + + " geog geography(POINT, 4326),\n" + + " json_col json NOT NULL,\n" + + " jsonb_col jsonb NOT NULL\n" + ")"; private static final String PG_SINK_DDL = "CREATE TABLE IF NOT EXISTS pg_e2e_sink_table (\n" @@ -126,7 +134,9 @@ public class JdbcPostgresIT extends TestSuiteBase implements TestResource { + " multilinestring varchar(2000) NULL,\n" + " multipolygon varchar(2000) NULL,\n" + " geometrycollection varchar(2000) NULL,\n" - + " geog varchar(2000) NULL\n" + + " geog varchar(2000) NULL,\n" + + " json_col json NOT NULL \n," + + " jsonb_col jsonb NOT NULL\n" + " )"; private static final String SOURCE_SQL = "select \n" @@ -157,8 +167,10 @@ public class JdbcPostgresIT extends TestSuiteBase implements TestResource { + "multilinestring,\n" + "multipolygon,\n" + "geometrycollection,\n" - + "geog\n" - + " from pg_e2e_source_table"; + + "geog,\n" + + "json_col,\n" + + "jsonb_col\n" + + "from pg_e2e_source_table"; private static final String SINK_SQL = "select\n" + " gid,\n" @@ -188,7 +200,9 @@ public class JdbcPostgresIT extends TestSuiteBase implements TestResource { + " cast(multilinestring as geometry) as multilinestring,\n" + " cast(multipolygon as geometry) as multilinestring,\n" + " cast(geometrycollection as geometry) as geometrycollection,\n" - + " cast(geog as geography) as geog\n" + + " cast(geog as geography) as geog,\n" + + " json_col,\n" + + " jsonb_col\n" + "from\n" + " pg_e2e_sink_table"; @@ -244,6 +258,43 @@ public void testAutoGenerateSQL(TestContainer container) } } + @Test + public void testCatalog() { + String schema = "public"; + String databaseName = POSTGRESQL_CONTAINER.getDatabaseName(); + String tableName = "pg_e2e_sink_table"; + String catalogDatabaseName = "pg_e2e_catalog_database"; + String catalogTableName = "pg_e2e_catalog_table"; + + Catalog catalog = + new PostgresCatalog( + "postgres", + POSTGRESQL_CONTAINER.getUsername(), + POSTGRESQL_CONTAINER.getPassword(), + JdbcUrlUtil.getUrlInfo(POSTGRESQL_CONTAINER.getJdbcUrl()), + schema); + catalog.open(); + + TablePath tablePath = new TablePath(databaseName, schema, tableName); + TablePath catalogTablePath = new TablePath(catalogDatabaseName, schema, catalogTableName); + + Assertions.assertFalse(catalog.databaseExists(catalogTablePath.getDatabaseName())); + catalog.createDatabase(catalogTablePath, false); + Assertions.assertTrue(catalog.databaseExists(catalogTablePath.getDatabaseName())); + + CatalogTable catalogTable = catalog.getTable(tablePath); + catalog.createTable(catalogTablePath, catalogTable, false); + Assertions.assertTrue(catalog.tableExists(catalogTablePath)); + + catalog.dropTable(catalogTablePath, false); + Assertions.assertFalse(catalog.tableExists(catalogTablePath)); + + catalog.dropDatabase(catalogTablePath, false); + Assertions.assertFalse(catalog.databaseExists(catalogTablePath.getDatabaseName())); + + catalog.close(); + } + private void initializeJdbcTable() { try (Connection connection = getJdbcConnection()) { Statement statement = connection.createStatement(); @@ -279,7 +330,9 @@ private void initializeJdbcTable() { + " multilinestring,\n" + " multipolygon,\n" + " geometrycollection,\n" - + " geog\n" + + " geog,\n" + + " json_col,\n" + + " jsonb_col \n" + " )\n" + "VALUES\n" + " (\n" @@ -330,7 +383,9 @@ private void initializeJdbcTable() { + " 'GEOMETRYCOLLECTION(POINT(-122.3462 47.5921), LINESTRING(-122.3460 47.5924, -122.3457 47.5924))',\n" + " 4326\n" + " ),\n" - + " ST_GeographyFromText('POINT(-122.3452 47.5925)')\n" + + " ST_GeographyFromText('POINT(-122.3452 47.5925)'),\n" + + " '{\"key\":\"test\"}',\n" + + " '{\"key\":\"test\"}'\n" + " )"); } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcSqlServerIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcSqlServerIT.java index f615b6656ea4..0a170ff4bed2 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcSqlServerIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcSqlServerIT.java @@ -19,6 +19,8 @@ import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.common.exception.SeaTunnelRuntimeException; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.sqlserver.SqlServerCatalog; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.sqlserver.SqlServerURLParser; import org.apache.seatunnel.e2e.common.TestSuiteBase; import org.apache.commons.lang3.tuple.Pair; @@ -44,9 +46,16 @@ public class JdbcSqlServerIT extends AbstractJdbcIT { private static final String SQLSERVER_CONTAINER_HOST = "sqlserver"; private static final String SQLSERVER_SOURCE = "source"; private static final String SQLSERVER_SINK = "sink"; + private static final String SQLSERVER_DATABASE = "master"; + private static final String SQLSERVER_SCHEMA = "dbo"; + private static final String SQLSERVER_CATALOG_DATABASE = "catalog_test"; + private static final int SQLSERVER_CONTAINER_PORT = 1433; private static final String SQLSERVER_URL = - "jdbc:sqlserver://" + AbstractJdbcIT.HOST + ":%s;encrypt=false;"; + "jdbc:sqlserver://" + + AbstractJdbcIT.HOST + + ":%s;encrypt=false;databaseName=" + + SQLSERVER_DATABASE; private static final String DRIVER_CLASS = "com.microsoft.sqlserver.jdbc.SQLServerDriver"; private static final List CONFIG_FILE = Lists.newArrayList("/jdbc_sqlserver_source_to_sink.conf"); @@ -81,8 +90,13 @@ JdbcCase getJdbcCase() { .jdbcUrl(jdbcUrl) .userName(username) .password(password) + .database(SQLSERVER_DATABASE) + .schema(SQLSERVER_SCHEMA) .sourceTable(SQLSERVER_SOURCE) .sinkTable(SQLSERVER_SINK) + .catalogDatabase(SQLSERVER_CATALOG_DATABASE) + .catalogSchema(SQLSERVER_SCHEMA) + .catalogTable(SQLSERVER_SINK) .createSql(CREATE_SQL) .configFile(CONFIG_FILE) .insertSql(insertSql) @@ -158,4 +172,22 @@ public String quoteIdentifier(String field) { public void clearTable(String schema, String table) { // do nothing. } + + @Override + protected String buildTableInfoWithSchema(String database, String schema, String table) { + return buildTableInfoWithSchema(schema, table); + } + + @Override + protected void initCatalog() { + catalog = + new SqlServerCatalog( + "sqlserver", + jdbcCase.getUserName(), + jdbcCase.getPassword(), + SqlServerURLParser.parse( + jdbcCase.getJdbcUrl().replace(HOST, dbServer.getHost())), + SQLSERVER_SCHEMA); + catalog.open(); + } } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_kingbase_source_and_sink.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_kingbase_source_and_sink.conf new file mode 100644 index 000000000000..326fc7272415 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_kingbase_source_and_sink.conf @@ -0,0 +1,43 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + job.mode = "BATCH" +} + +source{ + jdbc{ + driver = "com.kingbase8.Driver" + url = "jdbc:kingbase8://e2e_KINGBASEDb:54321/test" + user = "SYSTEM" + password = "123456" + query ="select * from public.e2e_table_source" + } +} + + +sink { + jdbc{ + driver = "com.kingbase8.Driver" + url = "jdbc:kingbase8://e2e_KINGBASEDb:54321/test" + user = "SYSTEM" + password = "123456" + query ="INSERT INTO public.e2e_table_sink (c1, c2, c3, c5, c7, c9, c11, c13, c15, c16, c18, c19, c20, c21, c23, c25, c26) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" + } +} + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_postgres_source_and_sink.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_postgres_source_and_sink.conf index f3293f44e610..7a34a4f49c05 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_postgres_source_and_sink.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_postgres_source_and_sink.conf @@ -28,7 +28,7 @@ source{ password = "test" query ="""select gid, text_col, varchar_col, char_col, boolean_col, smallint_col, integer_col, bigint_col, decimal_col, numeric_col, real_col, double_precision_col, smallserial_col, serial_col, bigserial_col, date_col, timestamp_col, bpchar_col, age, name, point, linestring, polygon_colums, multipoint, - multilinestring, multipolygon, geometrycollection, geog from pg_e2e_source_table""" + multilinestring, multipolygon, geometrycollection, geog, json_col, jsonb_col from pg_e2e_source_table""" } } @@ -36,7 +36,7 @@ source{ sink { Jdbc { driver = org.postgresql.Driver - url = "jdbc:postgresql://postgresql:5432/test?loggerLevel=OFF" + url = "jdbc:postgresql://postgresql:5432/test?loggerLevel=OFF&stringtype=unspecified" user = test password = test generate_sink_sql = true diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_postgres_source_and_sink_parallel.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_postgres_source_and_sink_parallel.conf index 25df382c4af8..58feafe102ae 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_postgres_source_and_sink_parallel.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_postgres_source_and_sink_parallel.conf @@ -28,7 +28,7 @@ source{ password = "test" query ="""select gid, text_col, varchar_col, char_col, boolean_col, smallint_col, integer_col, bigint_col, decimal_col, numeric_col, real_col, double_precision_col, smallserial_col, serial_col, bigserial_col, date_col, timestamp_col, bpchar_col, age, name, point, linestring, polygon_colums, multipoint, - multilinestring, multipolygon, geometrycollection, geog from pg_e2e_source_table""" + multilinestring, multipolygon, geometrycollection, geog, json_col, jsonb_col from pg_e2e_source_table""" partition_column= "gid" result_table_name = "jdbc" @@ -40,14 +40,14 @@ transform { sink { jdbc { - url = "jdbc:postgresql://postgresql:5432/test" + url = "jdbc:postgresql://postgresql:5432/test?stringtype=unspecified" driver = "org.postgresql.Driver" user = "test" password = "test" connection_check_timeout_sec = 100 query ="""INSERT INTO pg_e2e_sink_table ( gid, text_col, varchar_col, char_col, boolean_col, smallint_col, integer_col, bigint_col, decimal_col, numeric_col, real_col, double_precision_col, smallserial_col, serial_col, bigserial_col, date_col, timestamp_col, bpchar_col, age, name, point, - linestring, polygon_colums, multipoint, multilinestring, multipolygon, geometrycollection, geog ) - VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )""" + linestring, polygon_colums, multipoint, multilinestring, multipolygon, geometrycollection, geog, json_col, jsonb_col) + VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )""" } } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_postgres_source_and_sink_parallel_upper_lower.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_postgres_source_and_sink_parallel_upper_lower.conf index 46f1b43022b0..4a98ab64776f 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_postgres_source_and_sink_parallel_upper_lower.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_postgres_source_and_sink_parallel_upper_lower.conf @@ -28,7 +28,7 @@ source{ password = "test" query ="""select gid, text_col, varchar_col, char_col, boolean_col, smallint_col, integer_col, bigint_col, decimal_col, numeric_col, real_col, double_precision_col, smallserial_col, serial_col, bigserial_col, date_col, timestamp_col, bpchar_col, age, name, point, linestring, polygon_colums, multipoint, - multilinestring, multipolygon, geometrycollection, geog from pg_e2e_source_table""" + multilinestring, multipolygon, geometrycollection, geog, json_col, jsonb_col from pg_e2e_source_table""" partition_column= "gid" result_table_name = "jdbc" @@ -43,7 +43,7 @@ transform { sink { jdbc { - url = "jdbc:postgresql://postgresql:5432/test" + url = "jdbc:postgresql://postgresql:5432/test?stringtype=unspecified" driver = "org.postgresql.Driver" user = "test" @@ -51,7 +51,7 @@ sink { connection_check_timeout_sec = 100 query ="""INSERT INTO pg_e2e_sink_table ( gid, text_col, varchar_col, char_col, boolean_col, smallint_col, integer_col, bigint_col, decimal_col, numeric_col, real_col, double_precision_col, smallserial_col, serial_col, bigserial_col, date_col, timestamp_col, bpchar_col, age, name, point, - linestring, polygon_colums, multipoint, multilinestring, multipolygon, geometrycollection, geog ) - VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )""" + linestring, polygon_colums, multipoint, multilinestring, multipolygon, geometrycollection, geog, json_col, jsonb_col ) + VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )""" } } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_postgres_source_and_sink_xa.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_postgres_source_and_sink_xa.conf index ba32ca81bc19..d135b19376ae 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_postgres_source_and_sink_xa.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/resources/jdbc_postgres_source_and_sink_xa.conf @@ -29,7 +29,7 @@ source { password = "test" query ="""select gid, text_col, varchar_col, char_col, boolean_col, smallint_col, integer_col, bigint_col, decimal_col, numeric_col, real_col, double_precision_col, smallserial_col, serial_col, bigserial_col, date_col, timestamp_col, bpchar_col, age, name, point, linestring, polygon_colums, multipoint, - multilinestring, multipolygon, geometrycollection, geog from pg_e2e_source_table""" + multilinestring, multipolygon, geometrycollection, geog, json_col, jsonb_col from pg_e2e_source_table""" } } @@ -38,15 +38,15 @@ transform { sink { jdbc { - url = "jdbc:postgresql://postgresql:5432/test" + url = "jdbc:postgresql://postgresql:5432/test?stringtype=unspecified" driver = "org.postgresql.Driver" user = "test" password = "test" max_retries = 0 query ="""INSERT INTO pg_e2e_sink_table ( gid, text_col, varchar_col, char_col, boolean_col, smallint_col, integer_col, bigint_col, decimal_col, numeric_col, real_col, double_precision_col, smallserial_col, serial_col, bigserial_col, date_col, timestamp_col, bpchar_col, age, name, point, - linestring, polygon_colums, multipoint, multilinestring, multipolygon, geometrycollection, geog ) - VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )""" + linestring, polygon_colums, multipoint, multilinestring, multipolygon, geometrycollection, geog, json_col, jsonb_col ) + VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )""" is_exactly_once = "true" diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/pom.xml new file mode 100644 index 000000000000..fb4923020af4 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/pom.xml @@ -0,0 +1,49 @@ + + + + 4.0.0 + + org.apache.seatunnel + connector-jdbc-e2e + ${revision} + + + connector-jdbc-e2e-part-5 + SeaTunnel : E2E : Connector V2 : Jdbc : Part 5 + + + + org.apache.seatunnel + connector-jdbc-e2e-common + ${project.version} + test-jar + test + + + + + mysql + mysql-connector-java + test + + + com.dameng + DmJdbcDriver18 + test + + + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDmIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDmIT.java similarity index 100% rename from seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDmIT.java rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDmIT.java diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDmUpsetIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDmUpsetIT.java similarity index 100% rename from seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDmUpsetIT.java rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDmUpsetIT.java diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDorisIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDorisIT.java similarity index 100% rename from seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDorisIT.java rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDorisIT.java diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDorisdbIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDorisdbIT.java similarity index 100% rename from seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDorisdbIT.java rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDorisdbIT.java diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcGBase8aIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcGBase8aIT.java similarity index 100% rename from seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcGBase8aIT.java rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcGBase8aIT.java diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcGreenplumIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcGreenplumIT.java similarity index 100% rename from seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcGreenplumIT.java rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcGreenplumIT.java diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/resources/doris-jdbc-to-doris.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/resources/doris-jdbc-to-doris.conf similarity index 100% rename from seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/resources/doris-jdbc-to-doris.conf rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/resources/doris-jdbc-to-doris.conf diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/resources/jdbc_dm_source_and_dm_upset_sink.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/resources/jdbc_dm_source_and_dm_upset_sink.conf similarity index 97% rename from seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/resources/jdbc_dm_source_and_dm_upset_sink.conf rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/resources/jdbc_dm_source_and_dm_upset_sink.conf index 96046c88f829..81104b79077c 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/resources/jdbc_dm_source_and_dm_upset_sink.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/resources/jdbc_dm_source_and_dm_upset_sink.conf @@ -40,7 +40,7 @@ sink { user = "SYSDBA" password = "SYSDBA" database = "SYSDBA" - primary_keys = [DM_BIT] + primary_keys = ["DM_BIT"] table = "E2E_TABLE_SINK_UPSET" generate_sink_sql = true query = "" diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/resources/jdbc_dm_source_and_sink.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/resources/jdbc_dm_source_and_sink.conf similarity index 100% rename from seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/resources/jdbc_dm_source_and_sink.conf rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/resources/jdbc_dm_source_and_sink.conf diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/resources/jdbc_doris_source_and_sink.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/resources/jdbc_doris_source_and_sink.conf similarity index 100% rename from seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/resources/jdbc_doris_source_and_sink.conf rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/resources/jdbc_doris_source_and_sink.conf diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/resources/jdbc_gbase8a_source_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/resources/jdbc_gbase8a_source_to_assert.conf similarity index 100% rename from seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/resources/jdbc_gbase8a_source_to_assert.conf rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/resources/jdbc_gbase8a_source_to_assert.conf diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/resources/jdbc_greenplum_source_and_sink.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/resources/jdbc_greenplum_source_and_sink.conf similarity index 100% rename from seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/resources/jdbc_greenplum_source_and_sink.conf rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/resources/jdbc_greenplum_source_and_sink.conf diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/pom.xml index 0b3e18bdbf25..1dabc25490bd 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/pom.xml +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/pom.xml @@ -32,6 +32,7 @@ connector-jdbc-e2e-part-2 connector-jdbc-e2e-part-3 connector-jdbc-e2e-part-4 + connector-jdbc-e2e-part-5 diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/pom.xml index 81cbb7856984..fa2e1930cce4 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/pom.xml +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/pom.xml @@ -92,6 +92,11 @@ postgresql test + + mysql + mysql-connector-java + test + org.testcontainers mysql diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaConnectToKafkaIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaConnectToKafkaIT.java new file mode 100644 index 000000000000..591049917f8f --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaConnectToKafkaIT.java @@ -0,0 +1,282 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.e2e.connector.kafka; + +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.JsonNode; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; + +import org.apache.seatunnel.connectors.seatunnel.cdc.mysql.testutils.MySqlContainer; +import org.apache.seatunnel.connectors.seatunnel.cdc.mysql.testutils.MySqlVersion; +import org.apache.seatunnel.e2e.common.TestResource; +import org.apache.seatunnel.e2e.common.TestSuiteBase; +import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; +import org.apache.seatunnel.e2e.common.container.EngineType; +import org.apache.seatunnel.e2e.common.container.TestContainer; +import org.apache.seatunnel.e2e.common.junit.DisabledOnContainer; +import org.apache.seatunnel.e2e.common.junit.TestContainerExtension; + +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.serialization.ByteArraySerializer; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.TestTemplate; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.Container; +import org.testcontainers.containers.KafkaContainer; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.lifecycle.Startables; +import org.testcontainers.utility.DockerImageName; +import org.testcontainers.utility.DockerLoggerFactory; + +import com.google.common.collect.Lists; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; +import java.util.concurrent.TimeUnit; +import java.util.stream.Stream; + +import static org.awaitility.Awaitility.given; + +@Slf4j +@DisabledOnContainer( + value = {}, + type = {EngineType.SPARK}) +public class KafkaConnectToKafkaIT extends TestSuiteBase implements TestResource { + private static final Logger LOG = LoggerFactory.getLogger(KafkaConnectToKafkaIT.class); + private final ObjectMapper objectMapper = new ObjectMapper(); + // kafka + private static final String KAFKA_IMAGE_NAME = "confluentinc/cp-kafka:latest"; + + private static final String KAFKA_JDBC_TOPIC = "jdbc_source_record"; + + private static final String KAFKA_HOST = "kafka_connect_source_record"; + + private static KafkaContainer KAFKA_CONTAINER; + + private KafkaProducer kafkaProducer; + + // -----------------------------------mysql----------------------------------------- + private static MySqlContainer MYSQL_CONTAINER; + private static final String MYSQL_DATABASE = "seatunnel"; + private static final String MYSQL_HOST = "kafka_to_mysql_e2e"; + private static final int MYSQL_PORT = 3306; + private static final String MYSQL_DRIVER_JAR = + "https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.0.32/mysql-connector-j-8.0.32.jar"; + + @TestContainerExtension + private final ContainerExtendedFactory extendedFactory = + container -> { + Container.ExecResult extraCommands = + container.execInContainer( + "bash", + "-c", + "mkdir -p /tmp/seatunnel/plugins/Jdbc/lib && cd /tmp/seatunnel/plugins/Jdbc/lib && curl -O " + + MYSQL_DRIVER_JAR); + Assertions.assertEquals(0, extraCommands.getExitCode()); + }; + + private static MySqlContainer createMySqlContainer(MySqlVersion version) { + MySqlContainer mySqlContainer = + new MySqlContainer(version) + .withConfigurationOverride("docker/server-gtids/my.cnf") + .withSetupSQL("docker/setup.sql") + .withNetwork(NETWORK) + .withNetworkAliases(MYSQL_HOST) + .withDatabaseName("seatunnel") + .withUsername("st_user") + .withPassword("seatunnel") + .withLogConsumer(new Slf4jLogConsumer(LOG)); + mySqlContainer.setPortBindings( + com.google.common.collect.Lists.newArrayList( + String.format("%s:%s", MYSQL_PORT, MYSQL_PORT))); + return mySqlContainer; + } + + private void createKafkaContainer() { + KAFKA_CONTAINER = + new KafkaContainer(DockerImageName.parse(KAFKA_IMAGE_NAME)) + .withNetwork(NETWORK) + .withNetworkAliases(KAFKA_HOST) + .withLogConsumer( + new Slf4jLogConsumer( + DockerLoggerFactory.getLogger(KAFKA_IMAGE_NAME))); + } + + @BeforeAll + @Override + public void startUp() { + + LOG.info("The first stage: Starting Kafka containers..."); + createKafkaContainer(); + Startables.deepStart(Stream.of(KAFKA_CONTAINER)).join(); + LOG.info("Kafka Containers are started"); + + given().ignoreExceptions() + .atLeast(100, TimeUnit.MILLISECONDS) + .pollInterval(500, TimeUnit.MILLISECONDS) + .atMost(2, TimeUnit.MINUTES) + .untilAsserted(this::initKafkaProducer); + + LOG.info("The second stage: Starting Mysql containers..."); + MYSQL_CONTAINER = createMySqlContainer(MySqlVersion.V8_0); + Startables.deepStart(Stream.of(MYSQL_CONTAINER)).join(); + LOG.info("Mysql Containers are started"); + + given().ignoreExceptions() + .await() + .atLeast(100, TimeUnit.MILLISECONDS) + .pollInterval(500, TimeUnit.MILLISECONDS) + .atMost(2, TimeUnit.MINUTES) + .untilAsserted(this::initializeDatabase); + + given().ignoreExceptions() + .await() + .atLeast(100, TimeUnit.MILLISECONDS) + .pollInterval(500, TimeUnit.MILLISECONDS) + .atMost(2, TimeUnit.MINUTES) + .untilAsserted(this::initializeJdbcTable); + + log.info("Write 3 records to topic " + KAFKA_JDBC_TOPIC); + generateConnectJdbcRecord(); + } + + @TestTemplate + public void testJdbcRecordKafkaToMysql(TestContainer container) + throws IOException, InterruptedException, SQLException { + Container.ExecResult execResult = + container.executeJob("/kafkasource_jdbc_record_to_mysql.conf"); + Assertions.assertEquals(0, execResult.getExitCode(), execResult.getStderr()); + List actual = new ArrayList<>(); + try (Connection connection = + DriverManager.getConnection( + MYSQL_CONTAINER.getJdbcUrl(), + MYSQL_CONTAINER.getUsername(), + MYSQL_CONTAINER.getPassword())) { + try (Statement statement = connection.createStatement()) { + ResultSet resultSet = + statement.executeQuery("select * from seatunnel.jdbc_sink order by id"); + while (resultSet.next()) { + List row = + Arrays.asList( + resultSet.getInt("id"), + resultSet.getString("name"), + resultSet.getString("description"), + resultSet.getString("weight")); + actual.add(row); + } + } + } + List expected = + Lists.newArrayList( + Arrays.asList(15, "test", "test", "20"), + Arrays.asList(16, "test-001", "test", "30"), + Arrays.asList(18, "sdc", "sdc", "sdc")); + Assertions.assertIterableEquals(expected, actual); + + try (Connection connection = + DriverManager.getConnection( + MYSQL_CONTAINER.getJdbcUrl(), + MYSQL_CONTAINER.getUsername(), + MYSQL_CONTAINER.getPassword())) { + try (Statement statement = connection.createStatement()) { + statement.execute("truncate table seatunnel.jdbc_sink"); + LOG.info("testJdbcRecordKafkaToMysql truncate table sink"); + } + } + } + + @SneakyThrows + public void generateConnectJdbcRecord() { + String[] jdbcSourceRecords = { + "{\"schema\":{\"type\":\"struct\",\"fields\":[{\"type\":\"int64\",\"optional\":false,\"field\":\"id\"},{\"type\":\"string\",\"optional\":true,\"field\":\"name\"},{\"type\":\"string\",\"optional\":true,\"field\":\"description\"},{\"type\":\"string\",\"optional\":true,\"field\":\"weight\"}],\"optional\":false,\"name\":\"test_database_001.seatunnel_test_cdc\"},\"payload\":{\"id\":15,\"name\":\"test\",\"description\":\"test\",\"weight\":\"20\"}}", + "{\"schema\":{\"type\":\"struct\",\"fields\":[{\"type\":\"int64\",\"optional\":false,\"field\":\"id\"},{\"type\":\"string\",\"optional\":true,\"field\":\"name\"},{\"type\":\"string\",\"optional\":true,\"field\":\"description\"},{\"type\":\"string\",\"optional\":true,\"field\":\"weight\"}],\"optional\":false,\"name\":\"test_database_001.seatunnel_test_cdc\"},\"payload\":{\"id\":16,\"name\":\"test-001\",\"description\":\"test\",\"weight\":\"30\"}}", + "{\"schema\":{\"type\":\"struct\",\"fields\":[{\"type\":\"int64\",\"optional\":false,\"field\":\"id\"},{\"type\":\"string\",\"optional\":true,\"field\":\"name\"},{\"type\":\"string\",\"optional\":true,\"field\":\"description\"},{\"type\":\"string\",\"optional\":true,\"field\":\"weight\"}],\"optional\":false,\"name\":\"test_database_001.seatunnel_test_cdc\"},\"payload\":{\"id\":18,\"name\":\"sdc\",\"description\":\"sdc\",\"weight\":\"sdc\"}}" + }; + for (String value : jdbcSourceRecords) { + JsonNode jsonNode = objectMapper.readTree(value); + byte[] bytes = objectMapper.writeValueAsBytes(jsonNode); + ProducerRecord producerRecord = + new ProducerRecord<>(KAFKA_JDBC_TOPIC, null, bytes); + kafkaProducer.send(producerRecord).get(); + } + } + + private void initKafkaProducer() { + Properties props = new Properties(); + String bootstrapServers = KAFKA_CONTAINER.getBootstrapServers(); + props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); + props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class); + props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class); + kafkaProducer = new KafkaProducer<>(props); + } + + @Override + public void tearDown() { + MYSQL_CONTAINER.close(); + KAFKA_CONTAINER.close(); + } + + protected void initializeDatabase() { + try (Connection connection = + DriverManager.getConnection( + MYSQL_CONTAINER.getJdbcUrl(), + MYSQL_CONTAINER.getUsername(), + MYSQL_CONTAINER.getPassword())) { + Statement statement = connection.createStatement(); + String sql = "CREATE DATABASE IF NOT EXISTS " + MYSQL_DATABASE; + statement.execute(sql); + } catch (SQLException e) { + throw new RuntimeException("Initializing Mysql database failed!", e); + } + } + + private void initializeJdbcTable() { + try (Connection connection = + DriverManager.getConnection( + MYSQL_CONTAINER.getJdbcUrl(), + MYSQL_CONTAINER.getUsername(), + MYSQL_CONTAINER.getPassword())) { + Statement statement = connection.createStatement(); + String jdbcSink = + "CREATE TABLE IF NOT EXISTS seatunnel.jdbc_sink(\n" + + "id INT NOT NULL PRIMARY KEY,\n" + + "name varchar(255),\n" + + "description varchar(255),\n" + + "weight varchar(255)" + + ")"; + statement.execute(jdbcSink); + } catch (SQLException e) { + throw new RuntimeException("Initializing Mysql table failed!", e); + } + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java index 922798c3ded2..c72eb9d5c38c 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java @@ -331,7 +331,6 @@ private Properties kafkaConsumerConfig() { return props; } - @SuppressWarnings("checkstyle:Indentation") private void generateTestData(ProducerRecordConverter converter, int start, int end) { for (int i = start; i < end; i++) { SeaTunnelRow row = diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/kafkasource_jdbc_record_to_mysql.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/kafkasource_jdbc_record_to_mysql.conf new file mode 100644 index 000000000000..36ae276e0349 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/kafkasource_jdbc_record_to_mysql.conf @@ -0,0 +1,63 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + execution.parallelism = 1 + job.mode = "BATCH" + + #spark config + spark.app.name = "SeaTunnel" + spark.executor.instances = 1 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local +} + +source { + Kafka { + bootstrap.servers = "kafka_connect_source_record:9092" + topic = "jdbc_source_record" + result_table_name = "kafka_table" + start_mode = earliest + schema = { + fields { + id = "int" + name = "string" + description = "string" + weight = "string" + } + }, + format = COMPATIBLE_KAFKA_CONNECT_JSON + } +} + + +sink { + Jdbc { + driver = com.mysql.cj.jdbc.Driver + url = "jdbc:mysql://kafka_to_mysql_e2e:3306/seatunnel" + user = st_user + password = seatunnel + generate_sink_sql = true + database = seatunnel + table = jdbc_sink + primary_keys = ["id"] + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-redis-e2e/src/test/java/org/apache/seatunnel/e2e/connector/redis/RedisIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-redis-e2e/src/test/java/org/apache/seatunnel/e2e/connector/redis/RedisIT.java index 808f6860337e..bd4a9063ba13 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-redis-e2e/src/test/java/org/apache/seatunnel/e2e/connector/redis/RedisIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-redis-e2e/src/test/java/org/apache/seatunnel/e2e/connector/redis/RedisIT.java @@ -192,4 +192,15 @@ public void testRedis(TestContainer container) throws IOException, InterruptedEx jedis.del("key_list"); Assertions.assertEquals(0, jedis.llen("key_list")); } + + @TestTemplate + public void testRedisWithExpire(TestContainer container) + throws IOException, InterruptedException { + Container.ExecResult execResult = container.executeJob("/redis-to-redis-expire.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + Assertions.assertEquals(100, jedis.llen("key_list")); + // Clear data to prevent data duplication in the next TestContainer + Thread.sleep(60 * 1000); + Assertions.assertEquals(0, jedis.llen("key_list")); + } } diff --git a/.github/workflows/code-analysys.yml b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-redis-e2e/src/test/resources/redis-to-redis-expire.conf similarity index 51% rename from .github/workflows/code-analysys.yml rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-redis-e2e/src/test/resources/redis-to-redis-expire.conf index d4a0227da163..4a42bd3a46af 100644 --- a/.github/workflows/code-analysys.yml +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-redis-e2e/src/test/resources/redis-to-redis-expire.conf @@ -1,3 +1,4 @@ +# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. @@ -5,38 +6,45 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# + +env { + execution.parallelism = 1 + job.mode = "BATCH" + shade.identifier = "base64" + + #spark config + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local +} + +source { + Redis { + host = "redis-e2e" + port = 6379 + auth = "U2VhVHVubmVs" + keys = "key_test*" + data_type = key + } +} -name: SonarCloud -on: - push: - pull_request: - branches: [dev] - paths-ignore: - - 'docs/**' - - '**/*.md' - - 'seatunnel-ui/**' -jobs: - build: - runs-on: ubuntu-latest - timeout-minutes: 120 - steps: - - uses: actions/checkout@v2 - with: - submodules: true - - name: Set up JDK 11 - uses: actions/setup-java@v2 - with: - java-version: 11 - distribution: 'adopt' - - name: Run SonarCloud Analysis - run: bash ./tools/sonarcheck/check.sh - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - SONAR_TOKEN: ${{ secrets.SONARCLOUD_TOKEN }} \ No newline at end of file +sink { + Redis { + host = "redis-e2e" + port = 6379 + auth = "U2VhVHVubmVs" + key = "key_list" + data_type = list + expire = 30 + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-rocketmq-e2e/src/test/java/org/apache/seatunnel/e2e/connector/rocketmq/RocketMqIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-rocketmq-e2e/src/test/java/org/apache/seatunnel/e2e/connector/rocketmq/RocketMqIT.java index f292d1120121..aba0a9f2c077 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-rocketmq-e2e/src/test/java/org/apache/seatunnel/e2e/connector/rocketmq/RocketMqIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-rocketmq-e2e/src/test/java/org/apache/seatunnel/e2e/connector/rocketmq/RocketMqIT.java @@ -282,7 +282,6 @@ public void testRocketMqGroupOffsetsToConsole(TestContainer container) } @SneakyThrows - @SuppressWarnings("checkstyle:Indentation") private void generateTestData( ProducerRecordConverter converter, String topic, int start, int end) { for (int i = start; i < end; i++) { diff --git a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/TestContainerId.java b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/TestContainerId.java index 485fe2df7711..c50712820038 100644 --- a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/TestContainerId.java +++ b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/TestContainerId.java @@ -32,7 +32,7 @@ public enum TestContainerId { FLINK_1_16(FLINK, "1.16.0"), SPARK_2_4(SPARK, "2.4.6"), SPARK_3_3(SPARK, "3.3.0"), - SEATUNNEL(EngineType.SEATUNNEL, "2.3.1"); + SEATUNNEL(EngineType.SEATUNNEL, "dev"); private final EngineType engineType; private final String version; diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/pom.xml b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/pom.xml index a1315565349f..20a2e612a6f6 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/pom.xml +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/pom.xml @@ -92,6 +92,18 @@ ${netty-buffer.version} test + + org.apache.seatunnel + seatunnel-transforms-v2 + ${project.version} + test + + + org.apache.seatunnel + seatunnel-api + ${project.version} + test + diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceIT.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceIT.java index f7571968e8ff..1b547be73b93 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceIT.java +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceIT.java @@ -50,7 +50,7 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; /** * Cluster fault tolerance test. Test the job recovery capability and data consistency assurance @@ -68,7 +68,6 @@ public class ClusterFaultToleranceIT { public static final String DYNAMIC_TEST_PARALLELISM = "dynamic_test_parallelism"; - @SuppressWarnings("checkstyle:RegexpSingleline") @Test public void testBatchJobRunOkIn2Node() throws ExecutionException, InterruptedException { String testCaseName = "testBatchJobRunOkIn2Node"; @@ -193,7 +192,6 @@ private ImmutablePair createTestResources( return new ImmutablePair<>(targetDir, targetConfigFilePath); } - @SuppressWarnings("checkstyle:RegexpSingleline") @Test public void testStreamJobRunOkIn2Node() throws ExecutionException, InterruptedException { String testCaseName = "testStreamJobRunOkIn2Node"; @@ -286,7 +284,6 @@ public void testStreamJobRunOkIn2Node() throws ExecutionException, InterruptedEx } } - @SuppressWarnings("checkstyle:RegexpSingleline") @Test public void testBatchJobRestoreIn2NodeWorkerDown() throws ExecutionException, InterruptedException { @@ -386,7 +383,6 @@ public void testBatchJobRestoreIn2NodeWorkerDown() } } - @SuppressWarnings("checkstyle:RegexpSingleline") @Test public void testStreamJobRestoreIn2NodeWorkerDown() throws ExecutionException, InterruptedException { @@ -505,7 +501,6 @@ public void testStreamJobRestoreIn2NodeWorkerDown() } } - @SuppressWarnings("checkstyle:RegexpSingleline") @Test public void testBatchJobRestoreIn2NodeMasterDown() throws ExecutionException, InterruptedException { @@ -608,7 +603,6 @@ public void testBatchJobRestoreIn2NodeMasterDown() } } - @SuppressWarnings("checkstyle:RegexpSingleline") @Test public void testStreamJobRestoreIn2NodeMasterDown() throws ExecutionException, InterruptedException { @@ -736,7 +730,6 @@ public void testFor() throws ExecutionException, InterruptedException { } } - @SuppressWarnings("checkstyle:RegexpSingleline") @Test public void testStreamJobRestoreInAllNodeDown() throws ExecutionException, InterruptedException { @@ -936,7 +929,6 @@ public void testStreamJobRestoreInAllNodeDown() } } - @SuppressWarnings("checkstyle:RegexpSingleline") @Test @Disabled public void testStreamJobRestoreFromOssInAllNodeDown() diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceTwoPipelineIT.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceTwoPipelineIT.java index e99940defec6..3c677b45f3d3 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceTwoPipelineIT.java +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceTwoPipelineIT.java @@ -48,7 +48,7 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; /** * Cluster fault tolerance test. Test the job which have two pipelines can recovery capability and @@ -69,7 +69,6 @@ public class ClusterFaultToleranceTwoPipelineIT { public static final String DYNAMIC_TEST_PARALLELISM = "dynamic_test_parallelism"; - @SuppressWarnings("checkstyle:RegexpSingleline") @Test public void testTwoPipelineBatchJobRunOkIn2Node() throws ExecutionException, InterruptedException { @@ -200,7 +199,6 @@ private ImmutablePair createTestResources( return new ImmutablePair<>(targetDir, targetConfigFilePath); } - @SuppressWarnings("checkstyle:RegexpSingleline") @Test public void testTwoPipelineStreamJobRunOkIn2Node() throws ExecutionException, InterruptedException { @@ -299,7 +297,6 @@ public void testTwoPipelineStreamJobRunOkIn2Node() } } - @SuppressWarnings("checkstyle:RegexpSingleline") @Test public void testTwoPipelineBatchJobRestoreIn2NodeWorkerDown() throws ExecutionException, InterruptedException { @@ -415,7 +412,6 @@ public void testFor() throws ExecutionException, InterruptedException { } } - @SuppressWarnings("checkstyle:RegexpSingleline") @Test public void testTwoPipelineStreamJobRestoreIn2NodeWorkerDown() throws ExecutionException, InterruptedException { @@ -544,13 +540,14 @@ public void testTwoPipelineStreamJobRestoreIn2NodeWorkerDown() } } - @SuppressWarnings("checkstyle:RegexpSingleline") @Test public void testTwoPipelineBatchJobRestoreIn2NodeMasterDown() throws ExecutionException, InterruptedException { - String testCaseName = "testTwoPipelineBatchJobRestoreIn2NodeMasterDown"; + String testCaseName = + "testTwoPipelineBatchJobRestoreIn2NodeMasterDown" + System.currentTimeMillis(); String testClusterName = - "ClusterFaultToleranceTwoPipelineIT_testTwoPipelineBatchJobRestoreIn2NodeMasterDown"; + "ClusterFaultToleranceTwoPipelineIT_testTwoPipelineBatchJobRestoreIn2NodeMasterDown" + + System.currentTimeMillis(); long testRowNumber = 1000; int testParallelism = 6; HazelcastInstanceImpl node1 = null; @@ -653,13 +650,14 @@ public void testTwoPipelineBatchJobRestoreIn2NodeMasterDown() } } - @SuppressWarnings("checkstyle:RegexpSingleline") @Test public void testTwoPipelineStreamJobRestoreIn2NodeMasterDown() throws ExecutionException, InterruptedException { - String testCaseName = "testTwoPipelineStreamJobRestoreIn2NodeMasterDown"; + String testCaseName = + "testTwoPipelineStreamJobRestoreIn2NodeMasterDown" + System.currentTimeMillis(); String testClusterName = - "ClusterFaultToleranceTwoPipelineIT_testTwoPipelineStreamJobRestoreIn2NodeMasterDown"; + "ClusterFaultToleranceTwoPipelineIT_testTwoPipelineStreamJobRestoreIn2NodeMasterDown" + + System.currentTimeMillis(); long testRowNumber = 1000; int testParallelism = 6; HazelcastInstanceImpl node1 = null; diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/JobExecutionIT.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/JobExecutionIT.java index 4609a10dc4c5..4ecee663ae52 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/JobExecutionIT.java +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/JobExecutionIT.java @@ -24,13 +24,13 @@ import org.apache.seatunnel.engine.client.job.JobExecutionEnvironment; import org.apache.seatunnel.engine.common.config.ConfigProvider; import org.apache.seatunnel.engine.common.config.JobConfig; +import org.apache.seatunnel.engine.core.job.JobResult; import org.apache.seatunnel.engine.core.job.JobStatus; import org.apache.seatunnel.engine.server.SeaTunnelServerStarter; -import org.awaitility.Awaitility; -import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import com.hazelcast.client.config.ClientConfig; @@ -38,15 +38,18 @@ import lombok.extern.slf4j.Slf4j; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; +import static org.awaitility.Awaitility.await; + @Slf4j public class JobExecutionIT { private static HazelcastInstanceImpl hazelcastInstance; - @BeforeAll - public static void beforeClass() throws Exception { + @BeforeEach + public void beforeClass() { hazelcastInstance = SeaTunnelServerStarter.createHazelcastInstance( TestUtils.getClusterName("JobExecutionIT")); @@ -79,13 +82,9 @@ public void testExecuteJob() throws Exception { final ClientJobProxy clientJobProxy = jobExecutionEnv.execute(); CompletableFuture objectCompletableFuture = - CompletableFuture.supplyAsync( - () -> { - return clientJobProxy.waitForJobComplete(); - }); + CompletableFuture.supplyAsync(clientJobProxy::waitForJobComplete); - Awaitility.await() - .atMost(600000, TimeUnit.MILLISECONDS) + await().atMost(600000, TimeUnit.MILLISECONDS) .untilAsserted( () -> Assertions.assertTrue( @@ -110,17 +109,12 @@ public void cancelJobTest() throws Exception { final ClientJobProxy clientJobProxy = jobExecutionEnv.execute(); JobStatus jobStatus1 = clientJobProxy.getJobStatus(); Assertions.assertFalse(jobStatus1.isEndState()); - ClientJobProxy finalClientJobProxy = clientJobProxy; CompletableFuture objectCompletableFuture = - CompletableFuture.supplyAsync( - () -> { - return finalClientJobProxy.waitForJobComplete(); - }); + CompletableFuture.supplyAsync(clientJobProxy::waitForJobComplete); Thread.sleep(1000); clientJobProxy.cancelJob(); - Awaitility.await() - .atMost(20000, TimeUnit.MILLISECONDS) + await().atMost(20000, TimeUnit.MILLISECONDS) .untilAsserted( () -> Assertions.assertTrue( @@ -129,8 +123,75 @@ public void cancelJobTest() throws Exception { objectCompletableFuture.get()))); } - @AfterAll - static void afterClass() { + @Test + public void testGetErrorInfo() throws ExecutionException, InterruptedException { + Common.setDeployMode(DeployMode.CLIENT); + String filePath = TestUtils.getResource("batch_fakesource_to_console_error.conf"); + JobConfig jobConfig = new JobConfig(); + jobConfig.setName("fake_to_console_error"); + ClientConfig clientConfig = ConfigProvider.locateAndGetClientConfig(); + clientConfig.setClusterName(TestUtils.getClusterName("JobExecutionIT")); + SeaTunnelClient engineClient = new SeaTunnelClient(clientConfig); + JobExecutionEnvironment jobExecutionEnv = + engineClient.createExecutionContext(filePath, jobConfig); + final ClientJobProxy clientJobProxy = jobExecutionEnv.execute(); + CompletableFuture completableFuture = + CompletableFuture.supplyAsync(clientJobProxy::waitForJobComplete); + await().atMost(600000, TimeUnit.MILLISECONDS) + .untilAsserted(() -> Assertions.assertTrue(completableFuture.isDone())); + + JobResult result = clientJobProxy.getJobResultCache(); + Assertions.assertEquals(result.getStatus(), JobStatus.FAILED); + Assertions.assertTrue(result.getError().startsWith("java.lang.NumberFormatException")); + } + + @Test + public void testGetUnKnownJobID() { + + ClientConfig clientConfig = ConfigProvider.locateAndGetClientConfig(); + clientConfig.setClusterName(TestUtils.getClusterName("JobExecutionIT")); + SeaTunnelClient engineClient = new SeaTunnelClient(clientConfig); + + ClientJobProxy newClientJobProxy = + engineClient.createJobClient().getJobProxy(System.currentTimeMillis()); + CompletableFuture waitForJobCompleteFuture = + CompletableFuture.supplyAsync(newClientJobProxy::waitForJobComplete); + + await().atMost(20000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertEquals( + JobStatus.UNKNOWABLE, waitForJobCompleteFuture.get())); + + Assertions.assertEquals( + "UNKNOWABLE", engineClient.getJobClient().getJobStatus(System.currentTimeMillis())); + } + + @Test + public void testExpiredJobWasDeleted() throws Exception { + Common.setDeployMode(DeployMode.CLIENT); + String filePath = TestUtils.getResource("batch_fakesource_to_file.conf"); + JobConfig jobConfig = new JobConfig(); + jobConfig.setName("job_expire"); + + ClientConfig clientConfig = ConfigProvider.locateAndGetClientConfig(); + clientConfig.setClusterName(TestUtils.getClusterName("JobExecutionIT")); + SeaTunnelClient engineClient = new SeaTunnelClient(clientConfig); + JobExecutionEnvironment jobExecutionEnv = + engineClient.createExecutionContext(filePath, jobConfig); + + final ClientJobProxy clientJobProxy = jobExecutionEnv.execute(); + + Assertions.assertEquals(clientJobProxy.waitForJobComplete(), JobStatus.FINISHED); + await().atMost(65, TimeUnit.SECONDS) + .untilAsserted( + () -> + Assertions.assertEquals( + JobStatus.UNKNOWABLE, clientJobProxy.getJobStatus())); + } + + @AfterEach + void afterClass() { if (hazelcastInstance != null) { hazelcastInstance.shutdown(); } diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/RestApiIT.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/RestApiIT.java index 5f4e97ac8d50..d38d1c732f19 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/RestApiIT.java +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/RestApiIT.java @@ -22,10 +22,12 @@ import org.apache.seatunnel.engine.client.SeaTunnelClient; import org.apache.seatunnel.engine.client.job.ClientJobProxy; import org.apache.seatunnel.engine.client.job.JobExecutionEnvironment; +import org.apache.seatunnel.engine.common.Constant; import org.apache.seatunnel.engine.common.config.ConfigProvider; import org.apache.seatunnel.engine.common.config.JobConfig; import org.apache.seatunnel.engine.common.config.SeaTunnelConfig; import org.apache.seatunnel.engine.core.job.JobStatus; +import org.apache.seatunnel.engine.server.SeaTunnelServer; import org.apache.seatunnel.engine.server.SeaTunnelServerStarter; import org.apache.seatunnel.engine.server.rest.RestConstant; @@ -37,6 +39,7 @@ import com.hazelcast.client.config.ClientConfig; import com.hazelcast.instance.impl.HazelcastInstanceImpl; +import io.restassured.response.Response; import lombok.extern.slf4j.Slf4j; import java.util.concurrent.TimeUnit; @@ -131,6 +134,75 @@ public void testSystemMonitoringInformation() { .statusCode(200); } + @Test + public void testSubmitJob() { + String requestBody = + "{\n" + + " \"env\": {\n" + + " \"job.mode\": \"batch\"\n" + + " },\n" + + " \"source\": [\n" + + " {\n" + + " \"plugin_name\": \"FakeSource\",\n" + + " \"result_table_name\": \"fake\",\n" + + " \"row.num\": 100,\n" + + " \"schema\": {\n" + + " \"fields\": {\n" + + " \"name\": \"string\",\n" + + " \"age\": \"int\",\n" + + " \"card\": \"int\"\n" + + " }\n" + + " }\n" + + " }\n" + + " ],\n" + + " \"transform\": [\n" + + " ],\n" + + " \"sink\": [\n" + + " {\n" + + " \"plugin_name\": \"Console\",\n" + + " \"source_table_name\": [\"fake\"]\n" + + " }\n" + + " ]\n" + + "}"; + String parameters = "jobId=1&jobName=test&isStartWithSavePoint=false"; + // Only jobName is compared because jobId is randomly generated if isStartWithSavePoint is + // false + Response response = + given().body(requestBody) + .post( + HOST + + hazelcastInstance + .getCluster() + .getLocalMember() + .getAddress() + .getPort() + + RestConstant.SUBMIT_JOB_URL + + "?" + + parameters); + + response.then().statusCode(200).body("jobName", equalTo("test")); + String jobId = response.getBody().jsonPath().getString("jobId"); + SeaTunnelServer seaTunnelServer = + (SeaTunnelServer) + hazelcastInstance + .node + .getNodeExtension() + .createExtensionServices() + .get(Constant.SEATUNNEL_SERVICE_NAME); + JobStatus jobStatus = + seaTunnelServer.getCoordinatorService().getJobStatus(Long.parseLong(jobId)); + Assertions.assertEquals(JobStatus.RUNNING, jobStatus); + Awaitility.await() + .atMost(2, TimeUnit.MINUTES) + .untilAsserted( + () -> + Assertions.assertEquals( + JobStatus.FINISHED, + seaTunnelServer + .getCoordinatorService() + .getJobStatus(Long.parseLong(jobId)))); + } + @AfterAll static void afterClass() { if (hazelcastInstance != null) { diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/batch_fakesource_to_console_error.conf b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/batch_fakesource_to_console_error.conf new file mode 100644 index 000000000000..5fb9b3b80b65 --- /dev/null +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/batch_fakesource_to_console_error.conf @@ -0,0 +1,50 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + job.mode = "BATCH" +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + FakeSource { + result_table_name = "fake" + schema { + fields { + id = "int" + name = "string" + age = "int" + } + } + } +} +transform { + sql { + source_table_name = "fake" + result_table_name = "fake1" + query ="select cast(name as int) as name, id,age from fake" + } +} +sink { + console { + source_table_name = "fake1" + } + +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/seatunnel.yaml b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/seatunnel.yaml index 16b9f55c30dc..7775a483cd74 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/seatunnel.yaml +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/seatunnel.yaml @@ -17,6 +17,7 @@ seatunnel: engine: + history-job-expire-minutes: 1 backup-count: 2 queue-type: blockingqueue print-execution-info-interval: 10 @@ -24,9 +25,7 @@ seatunnel: dynamic-slot: true checkpoint: interval: 300000 - timeout: 10000 - max-concurrent: 1 - tolerable-failure: 2 + timeout: 100000 storage: type: localfile max-retained: 3 diff --git a/seatunnel-engine/seatunnel-engine-client/src/main/java/org/apache/seatunnel/engine/client/job/JobExecutionEnvironment.java b/seatunnel-engine/seatunnel-engine-client/src/main/java/org/apache/seatunnel/engine/client/job/JobExecutionEnvironment.java index bf3169e4c803..3f870c612160 100644 --- a/seatunnel-engine/seatunnel-engine-client/src/main/java/org/apache/seatunnel/engine/client/job/JobExecutionEnvironment.java +++ b/seatunnel-engine/seatunnel-engine-client/src/main/java/org/apache/seatunnel/engine/client/job/JobExecutionEnvironment.java @@ -18,55 +18,19 @@ package org.apache.seatunnel.engine.client.job; import org.apache.seatunnel.api.common.JobContext; -import org.apache.seatunnel.api.env.EnvCommonOptions; -import org.apache.seatunnel.common.config.Common; -import org.apache.seatunnel.common.utils.FileUtils; import org.apache.seatunnel.engine.client.SeaTunnelHazelcastClient; import org.apache.seatunnel.engine.common.config.JobConfig; -import org.apache.seatunnel.engine.common.exception.SeaTunnelEngineException; -import org.apache.seatunnel.engine.common.utils.IdGenerator; -import org.apache.seatunnel.engine.core.dag.actions.Action; -import org.apache.seatunnel.engine.core.dag.logical.LogicalDag; -import org.apache.seatunnel.engine.core.dag.logical.LogicalDagGenerator; +import org.apache.seatunnel.engine.core.job.AbstractJobEnvironment; import org.apache.seatunnel.engine.core.job.JobImmutableInformation; import org.apache.seatunnel.engine.core.parse.MultipleTableJobConfigParser; -import org.apache.commons.lang3.tuple.ImmutablePair; - -import com.hazelcast.logging.ILogger; -import com.hazelcast.logging.Logger; - -import java.io.IOException; -import java.net.MalformedURLException; -import java.net.URL; -import java.nio.file.Files; -import java.nio.file.Path; import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; import java.util.concurrent.ExecutionException; -import java.util.stream.Collectors; - -public class JobExecutionEnvironment { - - private static final ILogger LOGGER = Logger.getLogger(JobExecutionEnvironment.class); - - private final boolean isStartWithSavePoint; - - private final JobConfig jobConfig; - - private final List actions = new ArrayList<>(); - - private final Set jarUrls = new HashSet<>(); - private final List commonPluginJars = new ArrayList<>(); +public class JobExecutionEnvironment extends AbstractJobEnvironment { private final String jobFilePath; - private final IdGenerator idGenerator; - private final SeaTunnelHazelcastClient seaTunnelHazelcastClient; private final JobClient jobClient; @@ -78,35 +42,12 @@ public JobExecutionEnvironment( SeaTunnelHazelcastClient seaTunnelHazelcastClient, boolean isStartWithSavePoint, Long jobId) { - this.jobConfig = jobConfig; + super(jobConfig, isStartWithSavePoint); this.jobFilePath = jobFilePath; - this.idGenerator = new IdGenerator(); this.seaTunnelHazelcastClient = seaTunnelHazelcastClient; this.jobClient = new JobClient(seaTunnelHazelcastClient); - this.isStartWithSavePoint = isStartWithSavePoint; this.jobConfig.setJobContext( new JobContext(isStartWithSavePoint ? jobId : jobClient.getNewJobId())); - this.commonPluginJars.addAll(searchPluginJars()); - this.commonPluginJars.addAll( - new ArrayList<>( - Common.getThirdPartyJars( - jobConfig - .getEnvOptions() - .getOrDefault(EnvCommonOptions.JARS.key(), "") - .toString()) - .stream() - .map(Path::toUri) - .map( - uri -> { - try { - return uri.toURL(); - } catch (MalformedURLException e) { - throw new SeaTunnelEngineException( - "the uri of jar illegal:" + uri, e); - } - }) - .collect(Collectors.toList()))); - LOGGER.info("add common jar in plugins :" + commonPluginJars); } public JobExecutionEnvironment( @@ -117,27 +58,12 @@ public JobExecutionEnvironment( } /** Search all jars in SEATUNNEL_HOME/plugins */ - private Set searchPluginJars() { - try { - if (Files.exists(Common.pluginRootDir())) { - return new HashSet<>(FileUtils.searchJarFiles(Common.pluginRootDir())); - } - } catch (IOException | SeaTunnelEngineException e) { - LOGGER.warning( - String.format("Can't search plugin jars in %s.", Common.pluginRootDir()), e); - } - return Collections.emptySet(); - } - - private MultipleTableJobConfigParser getJobConfigParser() { + @Override + protected MultipleTableJobConfigParser getJobConfigParser() { return new MultipleTableJobConfigParser( jobFilePath, idGenerator, jobConfig, commonPluginJars, isStartWithSavePoint); } - private LogicalDagGenerator getLogicalDagGenerator() { - return new LogicalDagGenerator(actions, jobConfig, idGenerator); - } - public ClientJobProxy execute() throws ExecutionException, InterruptedException { JobImmutableInformation jobImmutableInformation = new JobImmutableInformation( @@ -150,11 +76,4 @@ public ClientJobProxy execute() throws ExecutionException, InterruptedException return jobClient.createJobProxy(jobImmutableInformation); } - - private LogicalDag getLogicalDag() { - ImmutablePair, Set> immutablePair = getJobConfigParser().parse(); - actions.addAll(immutablePair.getLeft()); - jarUrls.addAll(immutablePair.getRight()); - return getLogicalDagGenerator().generate(); - } } diff --git a/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/MultipleTableJobConfigParserTest.java b/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/MultipleTableJobConfigParserTest.java index a806d0a96289..e5faaea25c6f 100644 --- a/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/MultipleTableJobConfigParserTest.java +++ b/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/MultipleTableJobConfigParserTest.java @@ -37,7 +37,6 @@ public class MultipleTableJobConfigParserTest { - @SuppressWarnings("checkstyle:MagicNumber") @Test public void testSimpleJobParse() { Common.setDeployMode(DeployMode.CLIENT); @@ -58,7 +57,6 @@ public void testSimpleJobParse() { Assertions.assertEquals(3, actions.get(0).getParallelism()); } - @SuppressWarnings("checkstyle:MagicNumber") @Test public void testComplexJobParse() { Common.setDeployMode(DeployMode.CLIENT); diff --git a/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelClientTest.java b/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelClientTest.java index 85aec59c2769..ff39c4b2a5eb 100644 --- a/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelClientTest.java +++ b/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelClientTest.java @@ -56,7 +56,6 @@ import static org.apache.seatunnel.api.common.metrics.MetricNames.SOURCE_RECEIVED_QPS; import static org.awaitility.Awaitility.await; -@SuppressWarnings("checkstyle:MagicNumber") @DisabledOnOs(OS.WINDOWS) public class SeaTunnelClientTest { diff --git a/seatunnel-engine/seatunnel-engine-client/src/test/resources/seatunnel.yaml b/seatunnel-engine/seatunnel-engine-client/src/test/resources/seatunnel.yaml index ea5b5ac23070..4678cfed3d5a 100644 --- a/seatunnel-engine/seatunnel-engine-client/src/test/resources/seatunnel.yaml +++ b/seatunnel-engine/seatunnel-engine-client/src/test/resources/seatunnel.yaml @@ -24,8 +24,6 @@ seatunnel: checkpoint: interval: 6000 timeout: 7000 - max-concurrent: 1 - tolerable-failure: 2 storage: type: hdfs max-retained: 3 diff --git a/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/config/EngineConfig.java b/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/config/EngineConfig.java index edc18a0b15e1..847053a2d909 100644 --- a/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/config/EngineConfig.java +++ b/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/config/EngineConfig.java @@ -30,8 +30,8 @@ import static com.hazelcast.internal.util.Preconditions.checkPositive; @Data -@SuppressWarnings("checkstyle:MagicNumber") public class EngineConfig { + private int backupCount = ServerConfigOptions.BACKUP_COUNT.defaultValue(); private int printExecutionInfoInterval = ServerConfigOptions.PRINT_EXECUTION_INFO_INTERVAL.defaultValue(); @@ -50,6 +50,8 @@ public class EngineConfig { private CheckpointConfig checkpointConfig = ServerConfigOptions.CHECKPOINT.defaultValue(); private QueueType queueType = ServerConfigOptions.QUEUE_TYPE.defaultValue(); + private int historyJobExpireMinutes = + ServerConfigOptions.HISTORY_JOB_EXPIRE_MINUTES.defaultValue(); public void setBackupCount(int newBackupCount) { checkBackupCount(newBackupCount, 0); @@ -82,6 +84,13 @@ public void setTaskExecutionThreadShareMode(ThreadShareMode taskExecutionThreadS this.taskExecutionThreadShareMode = taskExecutionThreadShareMode; } + public void setHistoryJobExpireMinutes(int historyJobExpireMinutes) { + checkPositive( + historyJobExpireMinutes, + ServerConfigOptions.HISTORY_JOB_EXPIRE_MINUTES + " must be > 0"); + this.historyJobExpireMinutes = historyJobExpireMinutes; + } + public EngineConfig setQueueType(QueueType queueType) { checkNotNull(queueType); this.queueType = queueType; diff --git a/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/config/YamlSeaTunnelDomConfigProcessor.java b/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/config/YamlSeaTunnelDomConfigProcessor.java index 718e915a0c34..2010d1f4155f 100644 --- a/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/config/YamlSeaTunnelDomConfigProcessor.java +++ b/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/config/YamlSeaTunnelDomConfigProcessor.java @@ -131,6 +131,11 @@ private void parseEngineConfig(Node engineNode, SeaTunnelConfig config) { engineConfig.setSlotServiceConfig(parseSlotServiceConfig(node)); } else if (ServerConfigOptions.CHECKPOINT.key().equals(name)) { engineConfig.setCheckpointConfig(parseCheckpointConfig(node)); + } else if (ServerConfigOptions.HISTORY_JOB_EXPIRE_MINUTES.key().equals(name)) { + engineConfig.setHistoryJobExpireMinutes( + getIntegerValue( + ServerConfigOptions.HISTORY_JOB_EXPIRE_MINUTES.key(), + getTextContent(node))); } else { LOGGER.warning("Unrecognized element: " + name); } @@ -151,15 +156,10 @@ private CheckpointConfig parseCheckpointConfig(Node checkpointNode) { getIntegerValue( ServerConfigOptions.CHECKPOINT_TIMEOUT.key(), getTextContent(node))); - } else if (ServerConfigOptions.CHECKPOINT_MAX_CONCURRENT.key().equals(name)) { - checkpointConfig.setMaxConcurrentCheckpoints( - getIntegerValue( - ServerConfigOptions.CHECKPOINT_MAX_CONCURRENT.key(), - getTextContent(node))); - } else if (ServerConfigOptions.CHECKPOINT_TOLERABLE_FAILURE.key().equals(name)) { - checkpointConfig.setTolerableFailureCheckpoints( + } else if (ServerConfigOptions.SCHEMA_CHANGE_CHECKPOINT_TIMEOUT.key().equals(name)) { + checkpointConfig.setSchemaChangeCheckpointTimeout( getIntegerValue( - ServerConfigOptions.CHECKPOINT_TOLERABLE_FAILURE.key(), + ServerConfigOptions.SCHEMA_CHANGE_CHECKPOINT_TIMEOUT.key(), getTextContent(node))); } else if (ServerConfigOptions.CHECKPOINT_STORAGE.key().equals(name)) { checkpointConfig.setStorage(parseCheckpointStorageConfig(node)); diff --git a/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/config/server/CheckpointConfig.java b/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/config/server/CheckpointConfig.java index 78a5ff4d0558..78add9c8831e 100644 --- a/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/config/server/CheckpointConfig.java +++ b/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/config/server/CheckpointConfig.java @@ -21,20 +21,17 @@ import java.io.Serializable; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; @Data -@SuppressWarnings("checkstyle:MagicNumber") public class CheckpointConfig implements Serializable { public static final long MINIMAL_CHECKPOINT_TIME = 10; private long checkpointInterval = ServerConfigOptions.CHECKPOINT_INTERVAL.defaultValue(); private long checkpointTimeout = ServerConfigOptions.CHECKPOINT_TIMEOUT.defaultValue(); - private int maxConcurrentCheckpoints = - ServerConfigOptions.CHECKPOINT_MAX_CONCURRENT.defaultValue(); - private int tolerableFailureCheckpoints = - ServerConfigOptions.CHECKPOINT_TOLERABLE_FAILURE.defaultValue(); + private long schemaChangeCheckpointTimeout = + ServerConfigOptions.SCHEMA_CHANGE_CHECKPOINT_TIMEOUT.defaultValue(); private CheckpointStorageConfig storage = ServerConfigOptions.CHECKPOINT_STORAGE.defaultValue(); @@ -52,17 +49,10 @@ public void setCheckpointTimeout(long checkpointTimeout) { this.checkpointTimeout = checkpointTimeout; } - public void setMaxConcurrentCheckpoints(int maxConcurrentCheckpoints) { + public void setSchemaChangeCheckpointTimeout(long checkpointTimeout) { checkArgument( - maxConcurrentCheckpoints >= 1, - "The minimum number of concurrent checkpoints is 1."); - this.maxConcurrentCheckpoints = maxConcurrentCheckpoints; - } - - public void setTolerableFailureCheckpoints(int tolerableFailureCheckpoints) { - checkArgument( - maxConcurrentCheckpoints >= 0, - "The number of tolerance failed checkpoints must be a natural number."); - this.tolerableFailureCheckpoints = tolerableFailureCheckpoints; + checkpointTimeout >= MINIMAL_CHECKPOINT_TIME, + "The minimum checkpoint timeout is 10 ms."); + this.schemaChangeCheckpointTimeout = checkpointTimeout; } } diff --git a/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/config/server/ServerConfigOptions.java b/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/config/server/ServerConfigOptions.java index b5d02c03443c..6a0b8254057a 100644 --- a/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/config/server/ServerConfigOptions.java +++ b/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/config/server/ServerConfigOptions.java @@ -24,7 +24,6 @@ import java.util.Map; -@SuppressWarnings("checkstyle:MagicNumber") public class ServerConfigOptions { public static final Option BACKUP_COUNT = @@ -85,17 +84,12 @@ public class ServerConfigOptions { .defaultValue(30000) .withDescription("The timeout (in milliseconds) for a checkpoint."); - public static final Option CHECKPOINT_MAX_CONCURRENT = - Options.key("max-concurrent") + public static final Option SCHEMA_CHANGE_CHECKPOINT_TIMEOUT = + Options.key("schema-change-timeout") .intType() - .defaultValue(1) - .withDescription("The maximum number of concurrent checkpoints."); - - public static final Option CHECKPOINT_TOLERABLE_FAILURE = - Options.key("tolerable-failure") - .intType() - .defaultValue(0) - .withDescription("The tolerable failure number of a checkpoint."); + .defaultValue(30000) + .withDescription( + "The timeout (in milliseconds) for a schema change checkpoint."); public static final Option CHECKPOINT_STORAGE_TYPE = Options.key("type") @@ -138,4 +132,9 @@ public class ServerConfigOptions { .type(new TypeReference>() {}) .noDefaultValue() .withDescription("The checkpoint storage instance configuration."); + public static final Option HISTORY_JOB_EXPIRE_MINUTES = + Options.key("history-job-expire-minutes") + .intType() + .defaultValue(1440) + .withDescription("The expire time of history jobs.time unit minute"); } diff --git a/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/serializeable/ConfigDataSerializerHook.java b/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/serializeable/ConfigDataSerializerHook.java index 4867939d7406..7c7fa9261ff1 100644 --- a/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/serializeable/ConfigDataSerializerHook.java +++ b/seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/serializeable/ConfigDataSerializerHook.java @@ -46,7 +46,7 @@ public DataSerializableFactory createFactory() { } private static class Factory implements DataSerializableFactory { - @SuppressWarnings("checkstyle:returncount") + @Override public IdentifiedDataSerializable create(int typeId) { switch (typeId) { diff --git a/seatunnel-engine/seatunnel-engine-common/src/main/resources/seatunnel.yaml b/seatunnel-engine/seatunnel-engine-common/src/main/resources/seatunnel.yaml index e5d92281da7b..cc14d81eafa3 100644 --- a/seatunnel-engine/seatunnel-engine-common/src/main/resources/seatunnel.yaml +++ b/seatunnel-engine/seatunnel-engine-common/src/main/resources/seatunnel.yaml @@ -25,8 +25,6 @@ seatunnel: checkpoint: interval: 300000 timeout: 10000 - max-concurrent: 1 - tolerable-failure: 2 storage: type: hdfs max-retained: 3 diff --git a/seatunnel-engine/seatunnel-engine-common/src/test/java/org/apache/seatunnel/engine/common/config/YamlSeaTunnelConfigParserTest.java b/seatunnel-engine/seatunnel-engine-common/src/test/java/org/apache/seatunnel/engine/common/config/YamlSeaTunnelConfigParserTest.java index 4c199b352ef0..ed6853e39b4f 100644 --- a/seatunnel-engine/seatunnel-engine-common/src/test/java/org/apache/seatunnel/engine/common/config/YamlSeaTunnelConfigParserTest.java +++ b/seatunnel-engine/seatunnel-engine-common/src/test/java/org/apache/seatunnel/engine/common/config/YamlSeaTunnelConfigParserTest.java @@ -53,12 +53,6 @@ public void testSeaTunnelConfig() { Assertions.assertEquals( 7000, config.getEngineConfig().getCheckpointConfig().getCheckpointTimeout()); - Assertions.assertEquals( - 1, config.getEngineConfig().getCheckpointConfig().getMaxConcurrentCheckpoints()); - - Assertions.assertEquals( - 2, config.getEngineConfig().getCheckpointConfig().getTolerableFailureCheckpoints()); - Assertions.assertEquals( "hdfs", config.getEngineConfig().getCheckpointConfig().getStorage().getStorage()); diff --git a/seatunnel-engine/seatunnel-engine-common/src/test/resources/seatunnel.yaml b/seatunnel-engine/seatunnel-engine-common/src/test/resources/seatunnel.yaml index 4f6ce5f4ef1b..8453bdeecaaf 100644 --- a/seatunnel-engine/seatunnel-engine-common/src/test/resources/seatunnel.yaml +++ b/seatunnel-engine/seatunnel-engine-common/src/test/resources/seatunnel.yaml @@ -25,8 +25,6 @@ seatunnel: checkpoint: interval: 6000 timeout: 7000 - max-concurrent: 1 - tolerable-failure: 2 storage: type: hdfs max-retained: 3 diff --git a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/checkpoint/CheckpointType.java b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/checkpoint/CheckpointType.java index ab012ed87f7a..aa057a2e888e 100644 --- a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/checkpoint/CheckpointType.java +++ b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/checkpoint/CheckpointType.java @@ -22,6 +22,12 @@ public enum CheckpointType { /** Automatically triggered by the CheckpointCoordinator. */ CHECKPOINT_TYPE(true, "checkpoint"), + /** Automatically triggered by the schema change. */ + SCHEMA_CHANGE_BEFORE_POINT_TYPE(true, "schema-change-before-point"), + + /** Automatically triggered by the schema change. */ + SCHEMA_CHANGE_AFTER_POINT_TYPE(true, "schema-change-after-point"), + /** Triggered by the user. */ SAVEPOINT_TYPE(false, "savepoint"), @@ -52,4 +58,40 @@ public boolean isAuto() { public String getName() { return name; } + + public boolean isFinalCheckpoint() { + return this == COMPLETED_POINT_TYPE || this == SAVEPOINT_TYPE; + } + + public boolean isSchemaChangeCheckpoint() { + return isSchemaChangeBeforeCheckpoint() || isSchemaChangeAfterCheckpoint(); + } + + public boolean isSchemaChangeBeforeCheckpoint() { + return this == SCHEMA_CHANGE_BEFORE_POINT_TYPE; + } + + public boolean isSchemaChangeAfterCheckpoint() { + return this == SCHEMA_CHANGE_AFTER_POINT_TYPE; + } + + public boolean isSavepoint() { + return this == SAVEPOINT_TYPE; + } + + public boolean isGeneralCheckpoint() { + return this == CHECKPOINT_TYPE; + } + + public boolean notFinalCheckpoint() { + return isGeneralCheckpoint() || isSchemaChangeCheckpoint(); + } + + public boolean notSchemaChangeCheckpoint() { + return !isSchemaChangeCheckpoint(); + } + + public boolean notCompletedCheckpoint() { + return this != COMPLETED_POINT_TYPE; + } } diff --git a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/checkpoint/InternalCheckpointListener.java b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/checkpoint/InternalCheckpointListener.java index 6b6750be308e..137d5dbc5882 100644 --- a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/checkpoint/InternalCheckpointListener.java +++ b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/checkpoint/InternalCheckpointListener.java @@ -41,4 +41,14 @@ default void notifyCheckpointComplete(long checkpointId) throws Exception {} */ @Override default void notifyCheckpointAborted(long checkpointId) throws Exception {} + + /** + * The notification that the checkpoint has ended means that the notifyCheckpointComplete method + * has been called for all tasks. + * + * @param checkpointId The ID of the checkpoint . + * @throws Exception This method can propagate exceptions, which leads to a failure/recovery for + * the task or job. + */ + default void notifyCheckpointEnd(long checkpointId) throws Exception {} } diff --git a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/dag/actions/ShuffleMultipleRowStrategy.java b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/dag/actions/ShuffleMultipleRowStrategy.java index 37477189d79c..b5dcdf0534c6 100644 --- a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/dag/actions/ShuffleMultipleRowStrategy.java +++ b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/dag/actions/ShuffleMultipleRowStrategy.java @@ -17,6 +17,7 @@ package org.apache.seatunnel.engine.core.dag.actions; +import org.apache.seatunnel.api.table.event.SchemaChangeEvent; import org.apache.seatunnel.api.table.type.MultipleRowType; import org.apache.seatunnel.api.table.type.Record; import org.apache.seatunnel.api.table.type.SeaTunnelRow; @@ -73,7 +74,14 @@ public Map>> createShuffles( @Override public String createShuffleKey(Record record, int pipelineId, int inputIndex) { - String tableId = ((SeaTunnelRow) record.getData()).getTableId(); + String tableId; + if (record.getData() instanceof SeaTunnelRow) { + tableId = ((SeaTunnelRow) record.getData()).getTableId(); + } else if (record.getData() instanceof SchemaChangeEvent) { + tableId = ((SchemaChangeEvent) record.getData()).tablePath().toString(); + } else { + throw new UnsupportedOperationException("Unsupported record: " + record); + } return generateQueueName(pipelineId, inputIndex, tableId); } diff --git a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/dag/actions/ShufflePartitionStrategy.java b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/dag/actions/ShufflePartitionStrategy.java index 4b69eba2271d..45144d210f3d 100644 --- a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/dag/actions/ShufflePartitionStrategy.java +++ b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/dag/actions/ShufflePartitionStrategy.java @@ -35,7 +35,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; @Slf4j @SuperBuilder diff --git a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/dag/actions/SinkAction.java b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/dag/actions/SinkAction.java index d13087dcfca7..49a7bb69c83d 100644 --- a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/dag/actions/SinkAction.java +++ b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/dag/actions/SinkAction.java @@ -26,7 +26,6 @@ import java.util.List; import java.util.Set; -@SuppressWarnings("checkstyle:ClassTypeParameterName") public class SinkAction extends AbstractAction { private final SeaTunnelSink sink; diff --git a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/job/AbstractJobEnvironment.java b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/job/AbstractJobEnvironment.java new file mode 100644 index 000000000000..3509903c0883 --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/job/AbstractJobEnvironment.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.engine.core.job; + +import org.apache.seatunnel.api.env.EnvCommonOptions; +import org.apache.seatunnel.common.config.Common; +import org.apache.seatunnel.common.utils.FileUtils; +import org.apache.seatunnel.engine.common.config.JobConfig; +import org.apache.seatunnel.engine.common.exception.SeaTunnelEngineException; +import org.apache.seatunnel.engine.common.utils.IdGenerator; +import org.apache.seatunnel.engine.core.dag.actions.Action; +import org.apache.seatunnel.engine.core.dag.logical.LogicalDag; +import org.apache.seatunnel.engine.core.dag.logical.LogicalDagGenerator; +import org.apache.seatunnel.engine.core.parse.MultipleTableJobConfigParser; + +import org.apache.commons.lang3.tuple.ImmutablePair; + +import com.hazelcast.logging.ILogger; +import com.hazelcast.logging.Logger; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +public abstract class AbstractJobEnvironment { + protected static ILogger LOGGER = null; + + protected final boolean isStartWithSavePoint; + + protected final List actions = new ArrayList<>(); + protected final Set jarUrls = new HashSet<>(); + + protected final JobConfig jobConfig; + + protected final IdGenerator idGenerator; + + protected final List commonPluginJars = new ArrayList<>(); + + public AbstractJobEnvironment(JobConfig jobConfig, boolean isStartWithSavePoint) { + LOGGER = Logger.getLogger(getClass().getName()); + this.jobConfig = jobConfig; + this.isStartWithSavePoint = isStartWithSavePoint; + this.idGenerator = new IdGenerator(); + this.commonPluginJars.addAll(searchPluginJars()); + this.commonPluginJars.addAll( + new ArrayList<>( + Common.getThirdPartyJars( + jobConfig + .getEnvOptions() + .getOrDefault(EnvCommonOptions.JARS.key(), "") + .toString()) + .stream() + .map(Path::toUri) + .map( + uri -> { + try { + return uri.toURL(); + } catch (MalformedURLException e) { + throw new SeaTunnelEngineException( + "the uri of jar illegal:" + uri, e); + } + }) + .collect(Collectors.toList()))); + LOGGER.info("add common jar in plugins :" + commonPluginJars); + } + + protected Set searchPluginJars() { + try { + if (Files.exists(Common.pluginRootDir())) { + return new HashSet<>(FileUtils.searchJarFiles(Common.pluginRootDir())); + } + } catch (IOException | SeaTunnelEngineException e) { + LOGGER.warning( + String.format("Can't search plugin jars in %s.", Common.pluginRootDir()), e); + } + return Collections.emptySet(); + } + + protected abstract MultipleTableJobConfigParser getJobConfigParser(); + + protected LogicalDagGenerator getLogicalDagGenerator() { + return new LogicalDagGenerator(actions, jobConfig, idGenerator); + } + + protected LogicalDag getLogicalDag() { + ImmutablePair, Set> immutablePair = getJobConfigParser().parse(); + actions.addAll(immutablePair.getLeft()); + jarUrls.addAll(immutablePair.getRight()); + return getLogicalDagGenerator().generate(); + } +} diff --git a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/job/JobStatus.java b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/job/JobStatus.java index f9dbfb4c6cc3..7c50744dba0a 100644 --- a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/job/JobStatus.java +++ b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/job/JobStatus.java @@ -60,7 +60,10 @@ public enum JobStatus { SUSPENDED(EndState.LOCALLY), /** The job is currently reconciling and waits for task execution report to recover state. */ - RECONCILING(EndState.NOT_END); + RECONCILING(EndState.NOT_END), + + /** Cannot find the JobID or the job status has already been cleared. */ + UNKNOWABLE(EndState.GLOBALLY); // -------------------------------------------------------------------------------------------- diff --git a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/parse/MultipleTableJobConfigParser.java b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/parse/MultipleTableJobConfigParser.java index 86c0f3c94f59..ee2505286f82 100644 --- a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/parse/MultipleTableJobConfigParser.java +++ b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/parse/MultipleTableJobConfigParser.java @@ -130,6 +130,22 @@ public MultipleTableJobConfigParser( new JobConfigParser(idGenerator, commonPluginJars, isStartWithSavePoint); } + public MultipleTableJobConfigParser( + Config seaTunnelJobConfig, + IdGenerator idGenerator, + JobConfig jobConfig, + List commonPluginJars, + boolean isStartWithSavePoint) { + this.idGenerator = idGenerator; + this.jobConfig = jobConfig; + this.commonPluginJars = commonPluginJars; + this.isStartWithSavePoint = isStartWithSavePoint; + this.seaTunnelJobConfig = seaTunnelJobConfig; + this.envOptions = ReadonlyConfig.fromConfig(seaTunnelJobConfig.getConfig("env")); + this.fallbackParser = + new JobConfigParser(idGenerator, commonPluginJars, isStartWithSavePoint); + } + public ImmutablePair, Set> parse() { List sourceConfigs = TypesafeConfigUtils.getConfigList( diff --git a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/serializable/JobDataSerializerHook.java b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/serializable/JobDataSerializerHook.java index 18bdff612b46..29495085a8f7 100644 --- a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/serializable/JobDataSerializerHook.java +++ b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/serializable/JobDataSerializerHook.java @@ -70,7 +70,7 @@ public DataSerializableFactory createFactory() { } private static class Factory implements DataSerializableFactory { - @SuppressWarnings("checkstyle:returncount") + @Override public IdentifiedDataSerializable create(int typeId) { switch (typeId) { diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/CoordinatorService.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/CoordinatorService.java index d2931d0c37ef..89a2258ce2dc 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/CoordinatorService.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/CoordinatorService.java @@ -76,7 +76,6 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; -import static org.apache.seatunnel.api.common.metrics.MetricTags.JOB_ID; import static org.apache.seatunnel.engine.server.metrics.JobMetricsUtil.toJobMetricsMap; public class CoordinatorService { @@ -153,7 +152,6 @@ public class CoordinatorService { private final EngineConfig engineConfig; - @SuppressWarnings("checkstyle:MagicNumber") public CoordinatorService( @NonNull NodeEngineImpl nodeEngine, @NonNull SeaTunnelServer seaTunnelServer, @@ -220,7 +218,8 @@ private void initCoordinatorService() { .getMap(Constant.IMAP_FINISHED_JOB_METRICS), nodeEngine .getHazelcastInstance() - .getMap(Constant.IMAP_FINISHED_JOB_VERTEX_INFO)); + .getMap(Constant.IMAP_FINISHED_JOB_VERTEX_INFO), + engineConfig.getHistoryJobExpireMinutes()); List> collect = runningJobInfoIMap.entrySet().stream() @@ -391,7 +390,6 @@ private void checkNewActiveMaster() { } } - @SuppressWarnings("checkstyle:MagicNumber") private void clearCoordinatorService() { // interrupt all JobMaster runningJobMasterMap.values().forEach(JobMaster::interrupt); @@ -426,6 +424,18 @@ public ResourceManager getResourceManager() { /** call by client to submit job */ public PassiveCompletableFuture submitJob(long jobId, Data jobImmutableInformation) { CompletableFuture jobSubmitFuture = new CompletableFuture<>(); + + // Check if the current jobID is already running. If so, complete the submission + // successfully. + // This avoids potential issues like redundant job restores or other anomalies. + if (getJobMaster(jobId) != null) { + logger.warning( + String.format( + "The job %s is currently running; no need to submit again.", jobId)); + jobSubmitFuture.complete(null); + return new PassiveCompletableFuture<>(jobSubmitFuture); + } + JobMaster jobMaster = new JobMaster( jobImmutableInformation, @@ -495,10 +505,24 @@ public PassiveCompletableFuture savePoint(long jobId) { public PassiveCompletableFuture waitForJobComplete(long jobId) { JobMaster runningJobMaster = runningJobMasterMap.get(jobId); if (runningJobMaster == null) { - JobStatus jobStatus = jobHistoryService.getJobDetailState(jobId).getJobStatus(); + // Because operations on Imap cannot be performed within Operation. + CompletableFuture jobStateFuture = + CompletableFuture.supplyAsync( + () -> { + return jobHistoryService.getJobDetailState(jobId); + }, + executorService); + JobHistoryService.JobState jobState = null; + try { + jobState = jobStateFuture.get(); + } catch (Exception e) { + throw new SeaTunnelEngineException("get job state error", e); + } + CompletableFuture future = new CompletableFuture<>(); - // TODO support history service record job execute error - future.complete(new JobResult(jobStatus, null)); + if (jobState == null) future.complete(new JobResult(JobStatus.UNKNOWABLE, null)); + else + future.complete(new JobResult(jobState.getJobStatus(), jobState.getErrorMessage())); return new PassiveCompletableFuture<>(future); } else { return new PassiveCompletableFuture<>(runningJobMaster.getJobMasterCompleteFuture()); @@ -526,7 +550,7 @@ public JobStatus getJobStatus(long jobId) { JobMaster runningJobMaster = runningJobMasterMap.get(jobId); if (runningJobMaster == null) { JobHistoryService.JobState jobDetailState = jobHistoryService.getJobDetailState(jobId); - return null == jobDetailState ? null : jobDetailState.getJobStatus(); + return null == jobDetailState ? JobStatus.UNKNOWABLE : jobDetailState.getJobStatus(); } return runningJobMaster.getJobStatus(); } @@ -567,17 +591,7 @@ public Map getRunningJobMetrics() { (RawJobMetrics) NodeEngineUtil.sendOperationToMemberNode( nodeEngine, - new GetMetricsOperation( - dis -> - (dis.tagValue(JOB_ID) - != null - && runningJobIds - .contains( - Long - .parseLong( - dis - .tagValue( - JOB_ID))))), + new GetMetricsOperation(runningJobIds), address) .get(); metrics.add(rawJobMetrics); diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/NodeExtension.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/NodeExtension.java index d4137955c8bf..37e00cffab2d 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/NodeExtension.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/NodeExtension.java @@ -21,6 +21,7 @@ import org.apache.seatunnel.engine.server.log.Log4j2HttpGetCommandProcessor; import org.apache.seatunnel.engine.server.log.Log4j2HttpPostCommandProcessor; import org.apache.seatunnel.engine.server.rest.RestHttpGetCommandProcessor; +import org.apache.seatunnel.engine.server.rest.RestHttpPostCommandProcessor; import com.hazelcast.cluster.ClusterState; import com.hazelcast.instance.impl.DefaultNodeExtension; @@ -79,6 +80,7 @@ public TextCommandService createTextCommandService() { register(HTTP_GET, new Log4j2HttpGetCommandProcessor(this)); register(HTTP_POST, new Log4j2HttpPostCommandProcessor(this)); register(HTTP_GET, new RestHttpGetCommandProcessor(this)); + register(HTTP_POST, new RestHttpPostCommandProcessor(this)); } }; } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelHealthMonitor.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelHealthMonitor.java index b7900871e5a2..2adf87aa4128 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelHealthMonitor.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelHealthMonitor.java @@ -75,7 +75,6 @@ private static String percentageString(double p) { return format("%.2f%%", p); } - @SuppressWarnings("checkstyle:magicnumber") private static String numberToUnit(long number) { for (int i = 6; i > 0; i--) { // 1024 is for 1024 kb is 1 MB etc @@ -282,7 +281,6 @@ private void renderSwap() { .append(", "); } - @SuppressWarnings("checkstyle:UnnecessaryParentheses") private void renderHeap() { sb.append("heap.memory.used=") .append(numberToUnit(runtimeUsedMemory.read())) diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelServer.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelServer.java index f75f5af8e4d8..88ee1afc9ddf 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelServer.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelServer.java @@ -90,7 +90,6 @@ public SlotService getSlotService() { return slotService; } - @SuppressWarnings("checkstyle:MagicNumber") @Override public void init(NodeEngine engine, Properties hzProperties) { this.nodeEngine = (NodeEngineImpl) engine; @@ -159,7 +158,6 @@ public LiveOperationRegistry getLiveOperationRegistry() { return liveOperationRegistry; } - @SuppressWarnings("checkstyle:MagicNumber") public CoordinatorService getCoordinatorService() { int retryCount = 0; if (isMasterNode()) { @@ -228,12 +226,11 @@ public boolean taskIsEnded(@NonNull TaskGroupLocation taskGroupLocation) { return taskState != null && ((ExecutionState) taskState).isEndState(); } - @SuppressWarnings("checkstyle:MagicNumber") public boolean isMasterNode() { // must retry until the cluster have master node try { return RetryUtils.retryWithException( - () -> nodeEngine.getMasterAddress().equals(nodeEngine.getThisAddress()), + () -> nodeEngine.getThisAddress().equals(nodeEngine.getMasterAddress()), new RetryUtils.RetryMaterial( Constant.OPERATION_RETRY_TIME, true, diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java index 8f4c40f9a561..009225347c5d 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java @@ -298,7 +298,6 @@ public PassiveCompletableFuture deployLocalTask( return deployLocalTask(taskGroup, Thread.currentThread().getContextClassLoader()); } - @SuppressWarnings("checkstyle:MagicNumber") public PassiveCompletableFuture deployLocalTask( @NonNull TaskGroup taskGroup, @NonNull ClassLoader classLoader) { CompletableFuture resultFuture = new CompletableFuture<>(); @@ -378,7 +377,6 @@ public PassiveCompletableFuture deployLocalTask( return new PassiveCompletableFuture<>(resultFuture); } - @SuppressWarnings("checkstyle:MagicNumber") private void notifyTaskStatusToMaster( TaskGroupLocation taskGroupLocation, TaskExecutionState taskExecutionState) { long sleepTime = 1000; @@ -605,10 +603,10 @@ public void run() { ClassLoader oldClassLoader = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader(classLoader); final Task t = tracker.task; + ProgressState result = null; try { startedLatch.countDown(); t.init(); - ProgressState result; do { result = t.call(); } while (!result.isDone() @@ -625,10 +623,12 @@ public void run() { taskGroupExecutionTracker.exception(e); } finally { taskGroupExecutionTracker.taskDone(t); - try { - tracker.task.close(); - } catch (IOException e) { - logger.severe("Close task error", e); + if (result == null || !result.isDone()) { + try { + tracker.task.close(); + } catch (IOException e) { + logger.severe("Close task error", e); + } } } Thread.currentThread().setContextClassLoader(oldClassLoader); @@ -662,7 +662,6 @@ public final class CooperativeTaskWorker implements Runnable { private Future thisTaskFuture; private BlockingQueue> futureBlockingQueue; - @SuppressWarnings("checkstyle:MagicNumber") public CooperativeTaskWorker( LinkedBlockingDeque taskqueue, RunBusWorkSupplier runBusWorkSupplier, diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointBarrier.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointBarrier.java index 3ebd672551b3..7179cc8cb354 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointBarrier.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointBarrier.java @@ -48,7 +48,7 @@ public boolean snapshot() { @Override public boolean prepareClose() { - return checkpointType != CheckpointType.CHECKPOINT_TYPE; + return checkpointType.isFinalCheckpoint(); } public long getTimestamp() { diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCloseReason.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCloseReason.java index 9f35f62fd608..c07f10fb1c9c 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCloseReason.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCloseReason.java @@ -20,7 +20,7 @@ public enum CheckpointCloseReason { PIPELINE_END("Pipeline turn to end state."), CHECKPOINT_EXPIRED( - "Checkpoint expired before completing. Please increase checkpoint timeout in the seatunnel.yaml"), + "Checkpoint expired before completing. Please increase checkpoint timeout in the seatunnel.yaml or jobConfig env."), CHECKPOINT_COORDINATOR_COMPLETED("CheckpointCoordinator completed."), CHECKPOINT_COORDINATOR_SHUTDOWN("CheckpointCoordinator shutdown."), CHECKPOINT_COORDINATOR_RESET("CheckpointCoordinator reset."), diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCoordinator.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCoordinator.java index e6b3c3d27ac7..222f60a5cb50 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCoordinator.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCoordinator.java @@ -32,6 +32,7 @@ import org.apache.seatunnel.engine.serializer.api.Serializer; import org.apache.seatunnel.engine.serializer.protobuf.ProtoStuffSerializer; import org.apache.seatunnel.engine.server.checkpoint.operation.CheckpointBarrierTriggerOperation; +import org.apache.seatunnel.engine.server.checkpoint.operation.CheckpointEndOperation; import org.apache.seatunnel.engine.server.checkpoint.operation.CheckpointFinishedOperation; import org.apache.seatunnel.engine.server.checkpoint.operation.NotifyTaskRestoreOperation; import org.apache.seatunnel.engine.server.checkpoint.operation.NotifyTaskStartOperation; @@ -64,6 +65,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; @@ -71,7 +73,6 @@ import static org.apache.seatunnel.engine.common.utils.ExceptionUtil.sneakyThrow; import static org.apache.seatunnel.engine.core.checkpoint.CheckpointType.CHECKPOINT_TYPE; -import static org.apache.seatunnel.engine.core.checkpoint.CheckpointType.COMPLETED_POINT_TYPE; import static org.apache.seatunnel.engine.core.checkpoint.CheckpointType.SAVEPOINT_TYPE; import static org.apache.seatunnel.engine.server.checkpoint.CheckpointPlan.COORDINATOR_INDEX; import static org.apache.seatunnel.engine.server.task.statemachine.SeaTunnelTaskState.READY_START; @@ -116,13 +117,14 @@ public class CheckpointCoordinator { private final CheckpointConfig coordinatorConfig; - private int tolerableFailureCheckpoints; private transient ScheduledExecutorService scheduler; private final AtomicLong latestTriggerTimestamp = new AtomicLong(0); private final AtomicInteger pendingCounter = new AtomicInteger(0); + private final AtomicBoolean schemaChanging = new AtomicBoolean(false); + private final Object lock = new Object(); /** Flag marking the coordinator as shut down (not accepting any messages anymore). */ @@ -162,7 +164,6 @@ public CheckpointCoordinator( this.runningJobStateIMap = runningJobStateIMap; this.plan = plan; this.coordinatorConfig = checkpointConfig; - this.tolerableFailureCheckpoints = coordinatorConfig.getTolerableFailureCheckpoints(); this.pendingCheckpoints = new ConcurrentHashMap<>(); this.completedCheckpoints = new ArrayDeque<>(coordinatorConfig.getStorage().getMaxRetainedCheckpoints() + 1); @@ -183,19 +184,9 @@ public CheckpointCoordinator( this.checkpointIdCounter = checkpointIdCounter; this.readyToCloseStartingTask = new CopyOnWriteArraySet<>(); if (pipelineState != null) { - // fix after the savepoint job is restored, the checkpoint file cannot be generated - CompletedCheckpoint tmpCheckpoint = - serializer.deserialize(pipelineState.getStates(), CompletedCheckpoint.class); this.latestCompletedCheckpoint = - new CompletedCheckpoint( - tmpCheckpoint.getJobId(), - tmpCheckpoint.getPipelineId(), - tmpCheckpoint.getCheckpointId(), - tmpCheckpoint.getCheckpointTimestamp(), - CheckpointType.CHECKPOINT_TYPE, - tmpCheckpoint.getCompletedTimestamp(), - tmpCheckpoint.getTaskStates(), - tmpCheckpoint.getTaskStatistics()); + serializer.deserialize(pipelineState.getStates(), CompletedCheckpoint.class); + this.latestCompletedCheckpoint.setRestored(true); } this.checkpointCoordinatorFuture = new CompletableFuture(); @@ -326,8 +317,13 @@ private void notifyCompleted(CompletedCheckpoint completedCheckpoint) { try { LOG.info("start notify checkpoint completed, checkpoint:{}", completedCheckpoint); InvocationFuture[] invocationFutures = - notifyCheckpointCompleted(completedCheckpoint.getCheckpointId()); + notifyCheckpointCompleted(completedCheckpoint); CompletableFuture.allOf(invocationFutures).join(); + // Execution to this point means that all notifyCheckpointCompleted have been + // completed + InvocationFuture[] invocationFuturesForEnd = + notifyCheckpointEnd(completedCheckpoint); + CompletableFuture.allOf(invocationFuturesForEnd).join(); } catch (Throwable e) { handleCoordinatorError( "notify checkpoint completed failed", @@ -391,10 +387,9 @@ protected void tryTriggerPendingCheckpoint(CheckpointType checkpointType) { return; } final long currentTimestamp = Instant.now().toEpochMilli(); - if (notFinalCheckpoint(checkpointType)) { + if (checkpointType.notFinalCheckpoint() && checkpointType.notSchemaChangeCheckpoint()) { if (currentTimestamp - latestTriggerTimestamp.get() < coordinatorConfig.getCheckpointInterval() - || pendingCounter.get() >= coordinatorConfig.getMaxConcurrentCheckpoints() || !isAllTaskReady) { return; } @@ -411,27 +406,29 @@ protected void tryTriggerPendingCheckpoint(CheckpointType checkpointType) { shutdown)); return; } - if (!notFinalCheckpoint(checkpointType)) { + if (checkpointType.isFinalCheckpoint() || checkpointType.isSchemaChangeCheckpoint()) { if (pendingCounter.get() > 0) { scheduleTriggerPendingCheckpoint(checkpointType, 500L); return; } } + + if (schemaChanging.get() && checkpointType.isGeneralCheckpoint()) { + LOG.info("skip trigger generic-checkpoint because schema change in progress"); + return; + } + CompletableFuture pendingCheckpoint = createPendingCheckpoint(currentTimestamp, checkpointType); startTriggerPendingCheckpoint(pendingCheckpoint); pendingCounter.incrementAndGet(); // if checkpoint type are final type, we don't need to trigger next checkpoint - if (notFinalCheckpoint(checkpointType)) { + if (checkpointType.notFinalCheckpoint() && checkpointType.notSchemaChangeCheckpoint()) { scheduleTriggerPendingCheckpoint(coordinatorConfig.getCheckpointInterval()); } } } - private boolean notFinalCheckpoint(CheckpointType checkpointType) { - return checkpointType.equals(CHECKPOINT_TYPE); - } - public boolean isShutdown() { return shutdown; } @@ -519,22 +516,24 @@ private void startTriggerPendingCheckpoint( LOG.debug( "Start a scheduled task to prevent checkpoint timeouts for barrier " + pendingCheckpoint.getInfo()); + + long checkpointTimeout = coordinatorConfig.getCheckpointTimeout(); + if (pendingCheckpoint.getCheckpointType().isSchemaChangeAfterCheckpoint()) { + checkpointTimeout = coordinatorConfig.getSchemaChangeCheckpointTimeout(); + } + // TODO Need change to polling check until max timeout fails scheduler.schedule( () -> { // If any task is not acked within the checkpoint timeout if (pendingCheckpoints.get(pendingCheckpoint.getCheckpointId()) != null && !pendingCheckpoint.isFullyAcknowledged()) { - if (tolerableFailureCheckpoints-- <= 0) { - LOG.info( - "timeout checkpoint: " - + pendingCheckpoint.getInfo()); - handleCoordinatorError( - CheckpointCloseReason.CHECKPOINT_EXPIRED, null); - } + LOG.info("timeout checkpoint: " + pendingCheckpoint.getInfo()); + handleCoordinatorError( + CheckpointCloseReason.CHECKPOINT_EXPIRED, null); } }, - coordinatorConfig.getCheckpointTimeout(), + checkpointTimeout, TimeUnit.MILLISECONDS); }); } @@ -543,7 +542,7 @@ CompletableFuture createPendingCheckpoint( long triggerTimestamp, CheckpointType checkpointType) { synchronized (lock) { CompletableFuture idFuture; - if (!checkpointType.equals(COMPLETED_POINT_TYPE)) { + if (checkpointType.notCompletedCheckpoint()) { idFuture = CompletableFuture.supplyAsync( () -> { @@ -650,6 +649,7 @@ protected void cleanPendingCheckpoint(CheckpointCloseReason closedReason) { pipelineTaskStatus.clear(); readyToCloseStartingTask.clear(); pendingCounter.set(0); + schemaChanging.set(false); scheduler.shutdownNow(); scheduler = Executors.newScheduledThreadPool( @@ -679,7 +679,7 @@ protected void acknowledgeTask(TaskAcknowledgeOperation ackOperation) { pendingCheckpoint.acknowledgeTask( location, ackOperation.getStates(), - SAVEPOINT_TYPE == pendingCheckpoint.getCheckpointType() + pendingCheckpoint.getCheckpointType().isSavepoint() ? SubtaskStatus.SAVEPOINT_PREPARE_CLOSE : SubtaskStatus.RUNNING); } @@ -736,15 +736,9 @@ public synchronized void completePendingCheckpoint(CompletedCheckpoint completed notifyCompleted(completedCheckpoint); pendingCheckpoints.remove(checkpointId); pendingCounter.decrementAndGet(); - if (pendingCheckpoints.size() + 1 == coordinatorConfig.getMaxConcurrentCheckpoints()) { - // latest checkpoint completed time > checkpoint interval - if (notFinalCheckpoint(completedCheckpoint.getCheckpointType())) { - scheduleTriggerPendingCheckpoint(0L); - } - } if (isCompleted()) { cleanPendingCheckpoint(CheckpointCloseReason.CHECKPOINT_COORDINATOR_COMPLETED); - if (latestCompletedCheckpoint.getCheckpointType().equals(SAVEPOINT_TYPE)) { + if (latestCompletedCheckpoint.getCheckpointType().isSavepoint()) { updateStatus(CheckpointCoordinatorStatus.SUSPEND); checkpointCoordinatorFuture.complete( new CheckpointCoordinatorState(CheckpointCoordinatorStatus.SUSPEND, null)); @@ -756,28 +750,45 @@ public synchronized void completePendingCheckpoint(CompletedCheckpoint completed } } - public InvocationFuture[] notifyCheckpointCompleted(long checkpointId) { + public InvocationFuture[] notifyCheckpointCompleted(CompletedCheckpoint checkpoint) { + if (checkpoint.getCheckpointType().isSchemaChangeAfterCheckpoint()) { + completeSchemaChangeAfterCheckpoint(checkpoint); + } return plan.getPipelineSubtasks().stream() .map( taskLocation -> - new CheckpointFinishedOperation(taskLocation, checkpointId, true)) + new CheckpointFinishedOperation( + taskLocation, checkpoint.getCheckpointId(), true)) .map(checkpointManager::sendOperationToMemberNode) .toArray(InvocationFuture[]::new); } + public InvocationFuture[] notifyCheckpointEnd(CompletedCheckpoint checkpoint) { + if (checkpoint.getCheckpointType().isSchemaChangeCheckpoint()) { + return plan.getPipelineSubtasks().stream() + .map( + taskLocation -> + new CheckpointEndOperation( + taskLocation, checkpoint.getCheckpointId(), true)) + .map(checkpointManager::sendOperationToMemberNode) + .toArray(InvocationFuture[]::new); + } + return new InvocationFuture[0]; + } + public boolean isCompleted() { if (latestCompletedCheckpoint == null) { return false; } - return latestCompletedCheckpoint.getCheckpointType() == COMPLETED_POINT_TYPE - || latestCompletedCheckpoint.getCheckpointType() == SAVEPOINT_TYPE; + return latestCompletedCheckpoint.getCheckpointType().isFinalCheckpoint() + && !latestCompletedCheckpoint.isRestored(); } public boolean isEndOfSavePoint() { if (latestCompletedCheckpoint == null) { return false; } - return latestCompletedCheckpoint.getCheckpointType() == SAVEPOINT_TYPE; + return latestCompletedCheckpoint.getCheckpointType().isSavepoint(); } public PassiveCompletableFuture @@ -823,4 +834,53 @@ private synchronized void updateStatus(@NonNull CheckpointCoordinatorStatus targ checkpointStateImapKey, targetStatus)); } } + + protected void scheduleSchemaChangeBeforeCheckpoint() { + if (schemaChanging.compareAndSet(false, true)) { + LOG.info( + "stop trigger general-checkpoint({}@{}) because schema change in progress.", + pipelineId, + jobId); + LOG.info("schedule schema-change-before checkpoint({}@{}).", pipelineId, jobId); + scheduleTriggerPendingCheckpoint(CheckpointType.SCHEMA_CHANGE_BEFORE_POINT_TYPE, 0); + } else { + LOG.warn( + "schema-change-before checkpoint({}@{}) is already scheduled.", + pipelineId, + jobId); + } + } + + protected void scheduleSchemaChangeAfterCheckpoint() { + if (schemaChanging.get()) { + LOG.info("schedule schema-change-after checkpoint({}@{}).", pipelineId, jobId); + scheduleTriggerPendingCheckpoint(CheckpointType.SCHEMA_CHANGE_AFTER_POINT_TYPE, 0); + } else { + LOG.warn( + "schema-change-after checkpoint({}@{}) is already scheduled.", + pipelineId, + jobId); + } + } + + protected void completeSchemaChangeAfterCheckpoint(CompletedCheckpoint checkpoint) { + if (schemaChanging.compareAndSet(true, false)) { + LOG.info( + "completed schema-change-after checkpoint({}/{}@{}).", + checkpoint.getCheckpointId(), + pipelineId, + jobId); + LOG.info( + "recover trigger general-checkpoint({}/{}@{}).", + checkpoint.getCheckpointId(), + pipelineId, + jobId); + scheduleTriggerPendingCheckpoint(coordinatorConfig.getCheckpointInterval()); + } else { + throw new IllegalStateException( + String.format( + "schema-change-after checkpoint(%s/%s@%s) is already completed.", + checkpoint.getCheckpointId(), pipelineId, jobId)); + } + } } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointManager.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointManager.java index 0c5a91698e7b..cd58da1dd9eb 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointManager.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointManager.java @@ -32,6 +32,8 @@ import org.apache.seatunnel.engine.core.job.PipelineStatus; import org.apache.seatunnel.engine.server.checkpoint.operation.TaskAcknowledgeOperation; import org.apache.seatunnel.engine.server.checkpoint.operation.TaskReportStatusOperation; +import org.apache.seatunnel.engine.server.checkpoint.operation.TriggerSchemaChangeAfterCheckpointOperation; +import org.apache.seatunnel.engine.server.checkpoint.operation.TriggerSchemaChangeBeforeCheckpointOperation; import org.apache.seatunnel.engine.server.dag.execution.Pipeline; import org.apache.seatunnel.engine.server.dag.physical.SubPlan; import org.apache.seatunnel.engine.server.execution.Task; @@ -268,6 +270,38 @@ public void acknowledgeTask(TaskAcknowledgeOperation ackOperation) { coordinator.acknowledgeTask(ackOperation); } + public void triggerSchemaChangeBeforeCheckpoint( + TriggerSchemaChangeBeforeCheckpointOperation operation) { + log.debug( + "checkpoint manager received schema-change-before checkpoint operation {}", + operation.getTaskLocation()); + CheckpointCoordinator coordinator = getCheckpointCoordinator(operation.getTaskLocation()); + if (coordinator.isCompleted()) { + log.info( + "The checkpoint coordinator({}) is completed", + operation.getTaskLocation().getPipelineId()); + return; + } + + coordinator.scheduleSchemaChangeBeforeCheckpoint(); + } + + public void triggerSchemaChangeAfterCheckpoint( + TriggerSchemaChangeAfterCheckpointOperation operation) { + log.debug( + "checkpoint manager received schema-change-after checkpoint operation {}", + operation.getTaskLocation()); + CheckpointCoordinator coordinator = getCheckpointCoordinator(operation.getTaskLocation()); + if (coordinator.isCompleted()) { + log.info( + "The checkpoint coordinator({}) is completed", + operation.getTaskLocation().getPipelineId()); + return; + } + + coordinator.scheduleSchemaChangeAfterCheckpoint(); + } + public boolean isSavePointEnd() { return coordinatorMap.values().stream() .map(CheckpointCoordinator::isEndOfSavePoint) diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CompletedCheckpoint.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CompletedCheckpoint.java index 8d6ea554d7ee..3f196f2c8fff 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CompletedCheckpoint.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CompletedCheckpoint.java @@ -20,6 +20,9 @@ import org.apache.seatunnel.engine.core.checkpoint.Checkpoint; import org.apache.seatunnel.engine.core.checkpoint.CheckpointType; +import lombok.Getter; +import lombok.Setter; + import java.io.Serializable; import java.util.Map; @@ -41,6 +44,8 @@ public class CompletedCheckpoint implements Checkpoint, Serializable { private final Map taskStatistics; + @Getter @Setter private boolean isRestored = false; + public CompletedCheckpoint( long jobId, int pipelineId, diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/TaskStatistics.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/TaskStatistics.java index 03e55339d46f..3b00db5546bd 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/TaskStatistics.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/TaskStatistics.java @@ -21,8 +21,8 @@ import java.util.Arrays; import java.util.List; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkNotNull; public class TaskStatistics implements Serializable { /** ID of the task the statistics belong to. */ diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/operation/CheckpointEndOperation.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/operation/CheckpointEndOperation.java new file mode 100644 index 000000000000..62ec42f0a6a2 --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/operation/CheckpointEndOperation.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.engine.server.checkpoint.operation; + +import org.apache.seatunnel.common.utils.ExceptionUtils; +import org.apache.seatunnel.common.utils.RetryUtils; +import org.apache.seatunnel.engine.common.Constant; +import org.apache.seatunnel.engine.common.exception.SeaTunnelEngineException; +import org.apache.seatunnel.engine.server.SeaTunnelServer; +import org.apache.seatunnel.engine.server.exception.TaskGroupContextNotFoundException; +import org.apache.seatunnel.engine.server.execution.Task; +import org.apache.seatunnel.engine.server.execution.TaskGroupContext; +import org.apache.seatunnel.engine.server.execution.TaskLocation; +import org.apache.seatunnel.engine.server.serializable.CheckpointDataSerializerHook; +import org.apache.seatunnel.engine.server.task.operation.TaskOperation; + +import com.hazelcast.nio.ObjectDataInput; +import com.hazelcast.nio.ObjectDataOutput; +import lombok.Getter; +import lombok.NoArgsConstructor; + +import java.io.IOException; + +@Getter +@NoArgsConstructor +public class CheckpointEndOperation extends TaskOperation { + + private long checkpointId; + + private boolean successful; + + public CheckpointEndOperation( + TaskLocation taskLocation, long checkpointId, boolean successful) { + super(taskLocation); + this.checkpointId = checkpointId; + this.successful = successful; + } + + @Override + public int getFactoryId() { + return CheckpointDataSerializerHook.FACTORY_ID; + } + + @Override + public int getClassId() { + return CheckpointDataSerializerHook.CHECKPOINT_END_OPERATOR; + } + + @Override + protected void writeInternal(ObjectDataOutput out) throws IOException { + super.writeInternal(out); + out.writeLong(checkpointId); + out.writeBoolean(successful); + } + + @Override + protected void readInternal(ObjectDataInput in) throws IOException { + super.readInternal(in); + checkpointId = in.readLong(); + successful = in.readBoolean(); + } + + @Override + public void run() throws Exception { + SeaTunnelServer server = getService(); + RetryUtils.retryWithException( + () -> { + try { + TaskGroupContext groupContext = + server.getTaskExecutionService() + .getExecutionContext(taskLocation.getTaskGroupLocation()); + Task task = groupContext.getTaskGroup().getTask(taskLocation.getTaskID()); + ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); + Thread.currentThread().setContextClassLoader(groupContext.getClassLoader()); + + task.notifyCheckpointEnd(checkpointId); + + Thread.currentThread().setContextClassLoader(classLoader); + } catch (Exception e) { + throw new SeaTunnelEngineException(ExceptionUtils.getMessage(e)); + } + return null; + }, + new RetryUtils.RetryMaterial( + Constant.OPERATION_RETRY_TIME, + true, + exception -> + exception instanceof TaskGroupContextNotFoundException + && !server.taskIsEnded(taskLocation.getTaskGroupLocation()), + Constant.OPERATION_RETRY_SLEEP)); + } +} diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/operation/TriggerSchemaChangeAfterCheckpointOperation.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/operation/TriggerSchemaChangeAfterCheckpointOperation.java new file mode 100644 index 000000000000..66be8cc9f306 --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/operation/TriggerSchemaChangeAfterCheckpointOperation.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.engine.server.checkpoint.operation; + +import org.apache.seatunnel.engine.server.SeaTunnelServer; +import org.apache.seatunnel.engine.server.execution.TaskLocation; +import org.apache.seatunnel.engine.server.serializable.CheckpointDataSerializerHook; + +import com.hazelcast.nio.ObjectDataInput; +import com.hazelcast.nio.ObjectDataOutput; +import com.hazelcast.nio.serialization.IdentifiedDataSerializable; +import com.hazelcast.spi.impl.operationservice.Operation; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; + +@Slf4j +@Getter +@AllArgsConstructor +@NoArgsConstructor +public class TriggerSchemaChangeAfterCheckpointOperation extends Operation + implements IdentifiedDataSerializable { + + private TaskLocation taskLocation; + + @Override + public int getFactoryId() { + return CheckpointDataSerializerHook.FACTORY_ID; + } + + @Override + public int getClassId() { + return CheckpointDataSerializerHook.TRIGGER_SCHEMA_CHANGE_AFTER_CHECKPOINT_OPERATOR; + } + + @Override + protected void writeInternal(ObjectDataOutput out) throws IOException { + out.writeObject(taskLocation); + } + + @Override + protected void readInternal(ObjectDataInput in) throws IOException { + taskLocation = in.readObject(); + } + + @Override + public void run() { + log.debug("call TriggerSchemaChangeAfterCheckpointOperation start {}", taskLocation); + ((SeaTunnelServer) getService()) + .getCoordinatorService() + .getJobMaster(taskLocation.getJobId()) + .getCheckpointManager() + .triggerSchemaChangeAfterCheckpoint(this); + log.debug("call TriggerSchemaChangeAfterCheckpointOperation finished {}", taskLocation); + } +} diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/operation/TriggerSchemaChangeBeforeCheckpointOperation.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/operation/TriggerSchemaChangeBeforeCheckpointOperation.java new file mode 100644 index 000000000000..54daedd8c627 --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/operation/TriggerSchemaChangeBeforeCheckpointOperation.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.engine.server.checkpoint.operation; + +import org.apache.seatunnel.engine.server.SeaTunnelServer; +import org.apache.seatunnel.engine.server.execution.TaskLocation; +import org.apache.seatunnel.engine.server.serializable.CheckpointDataSerializerHook; + +import com.hazelcast.nio.ObjectDataInput; +import com.hazelcast.nio.ObjectDataOutput; +import com.hazelcast.nio.serialization.IdentifiedDataSerializable; +import com.hazelcast.spi.impl.operationservice.Operation; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; + +@Slf4j +@Getter +@AllArgsConstructor +@NoArgsConstructor +public class TriggerSchemaChangeBeforeCheckpointOperation extends Operation + implements IdentifiedDataSerializable { + + private TaskLocation taskLocation; + + @Override + public int getFactoryId() { + return CheckpointDataSerializerHook.FACTORY_ID; + } + + @Override + public int getClassId() { + return CheckpointDataSerializerHook.TRIGGER_SCHEMA_CHANGE_BEFORE_CHECKPOINT_OPERATOR; + } + + @Override + protected void writeInternal(ObjectDataOutput out) throws IOException { + out.writeObject(taskLocation); + } + + @Override + protected void readInternal(ObjectDataInput in) throws IOException { + taskLocation = in.readObject(); + } + + @Override + public void run() { + log.debug("call TriggerSchemaChangeBeforeCheckpointOperation {}", taskLocation); + ((SeaTunnelServer) getService()) + .getCoordinatorService() + .getJobMaster(taskLocation.getJobId()) + .getCheckpointManager() + .triggerSchemaChangeBeforeCheckpoint(this); + log.debug("call SchemaChangeBeforeCheckpoint finished {}", taskLocation); + } +} diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/execution/ExecutionPlanGenerator.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/execution/ExecutionPlanGenerator.java index de7c1fcb25d1..d7beaf3a32f6 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/execution/ExecutionPlanGenerator.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/execution/ExecutionPlanGenerator.java @@ -54,7 +54,7 @@ import java.util.Optional; import java.util.Set; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; @Slf4j public class ExecutionPlanGenerator { diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/execution/PipelineGenerator.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/execution/PipelineGenerator.java index 17bd2509a8b3..f0a92a66914e 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/execution/PipelineGenerator.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/execution/PipelineGenerator.java @@ -29,7 +29,7 @@ import java.util.Map; import java.util.stream.Collectors; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; public class PipelineGenerator { /** The action & vertex ID needs to be regenerated because of split pipeline. */ diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/physical/PhysicalPlanGenerator.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/physical/PhysicalPlanGenerator.java index 69d72d7130a9..c6173cf06479 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/physical/PhysicalPlanGenerator.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/physical/PhysicalPlanGenerator.java @@ -552,7 +552,10 @@ private List getSourceTask( .getJobId(), taskLocation, finalParallelismIndex, - f); + (PhysicalExecutionFlow< + SourceAction, + SourceConfig>) + f); } else { return new TransformSeaTunnelTask( jobImmutableInformation @@ -734,7 +737,6 @@ private static boolean sourceWithSink(PhysicalExecutionFlow flow) { .contains(true); } - @SuppressWarnings("checkstyle:MagicNumber") private long mixIDPrefixAndIndex(long idPrefix, int index) { return idPrefix * 10000 + index; } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/physical/PhysicalVertex.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/physical/PhysicalVertex.java index 3c840a269ad0..85cc31850bfd 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/physical/PhysicalVertex.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/physical/PhysicalVertex.java @@ -295,7 +295,6 @@ private TaskDeployState deployOnRemote(@NonNull SlotProfile slotProfile) { }); } - @SuppressWarnings("checkstyle:MagicNumber") // This method must not throw an exception public TaskDeployState deploy(@NonNull SlotProfile slotProfile) { try { @@ -479,7 +478,6 @@ public void cancel() { } } - @SuppressWarnings("checkstyle:MagicNumber") private void noticeTaskExecutionServiceCancel() { // Check whether the node exists, and whether the Task on the node exists. If there is no // direct update state diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/physical/PipelineLocation.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/physical/PipelineLocation.java index 45609e5cef0e..c7a2c3caaece 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/physical/PipelineLocation.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/physical/PipelineLocation.java @@ -25,6 +25,7 @@ @AllArgsConstructor @Data public class PipelineLocation implements Serializable { + private static final long serialVersionUID = 2510281765212372549L; private long jobId; private int pipelineId; } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/execution/TaskGroupLocation.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/execution/TaskGroupLocation.java index 83686745a8e0..6dc7cadad6ff 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/execution/TaskGroupLocation.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/execution/TaskGroupLocation.java @@ -30,6 +30,7 @@ @Data @AllArgsConstructor public class TaskGroupLocation implements Serializable { + private static final long serialVersionUID = -8321526709920799751L; private final long jobId; private final int pipelineId; diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/execution/TaskLocation.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/execution/TaskLocation.java index 210a3802c33f..00ee084145bb 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/execution/TaskLocation.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/execution/TaskLocation.java @@ -43,7 +43,6 @@ public TaskLocation(TaskGroupLocation taskGroupLocation, long idPrefix, int inde this.index = index; } - @SuppressWarnings("checkstyle:MagicNumber") private long mixIDPrefixAndIndex(long idPrefix, int index) { return idPrefix * 10000 + index; } @@ -64,7 +63,6 @@ public long getTaskID() { return taskID; } - @SuppressWarnings("checkstyle:MagicNumber") public long getTaskVertexId() { return taskID / 10000; } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/job/JobImmutableInformationEnv.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/job/JobImmutableInformationEnv.java new file mode 100644 index 000000000000..4dd72e31cb8c --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/job/JobImmutableInformationEnv.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.engine.server.job; + +import org.apache.seatunnel.shade.com.typesafe.config.Config; + +import org.apache.seatunnel.api.common.JobContext; +import org.apache.seatunnel.engine.common.Constant; +import org.apache.seatunnel.engine.common.config.JobConfig; +import org.apache.seatunnel.engine.core.job.AbstractJobEnvironment; +import org.apache.seatunnel.engine.core.job.JobImmutableInformation; +import org.apache.seatunnel.engine.core.parse.MultipleTableJobConfigParser; + +import com.hazelcast.instance.impl.Node; +import com.hazelcast.spi.impl.NodeEngineImpl; + +import java.util.ArrayList; + +public class JobImmutableInformationEnv extends AbstractJobEnvironment { + private final Config seaTunnelJobConfig; + + private final NodeEngineImpl nodeEngine; + + private final Long jobId; + + public JobImmutableInformationEnv( + JobConfig jobConfig, + Config seaTunnelJobConfig, + Node node, + boolean isStartWithSavePoint, + Long jobId) { + super(jobConfig, isStartWithSavePoint); + this.seaTunnelJobConfig = seaTunnelJobConfig; + this.nodeEngine = node.getNodeEngine(); + this.jobConfig.setJobContext( + new JobContext( + isStartWithSavePoint + ? jobId + : nodeEngine + .getHazelcastInstance() + .getFlakeIdGenerator(Constant.SEATUNNEL_ID_GENERATOR_NAME) + .newId())); + this.jobId = Long.valueOf(jobConfig.getJobContext().getJobId()); + } + + public Long getJobId() { + return jobId; + } + + @Override + protected MultipleTableJobConfigParser getJobConfigParser() { + return new MultipleTableJobConfigParser( + seaTunnelJobConfig, idGenerator, jobConfig, commonPluginJars, isStartWithSavePoint); + } + + public JobImmutableInformation build() { + return new JobImmutableInformation( + Long.parseLong(jobConfig.getJobContext().getJobId()), + jobConfig.getName(), + isStartWithSavePoint, + nodeEngine.getSerializationService().toData(getLogicalDag()), + jobConfig, + new ArrayList<>(jarUrls)); + } +} diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobHistoryService.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobHistoryService.java index dda9a2d0f3fc..73474ad77617 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobHistoryService.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobHistoryService.java @@ -76,20 +76,22 @@ public class JobHistoryService { * finishedJobStateImap key is jobId and value is jobState(json) JobStateData Indicates the * status of the job, pipeline, and task */ - // TODO need to limit the amount of storage private final IMap finishedJobStateImap; private final IMap finishedJobMetricsImap; private final ObjectMapper objectMapper; + private final int finishedJobExpireTime; + public JobHistoryService( IMap runningJobStateIMap, ILogger logger, Map runningJobMasterMap, IMap finishedJobStateImap, IMap finishedJobMetricsImap, - IMap finishedJobVertexInfoImap) { + IMap finishedJobVertexInfoImap, + int finishedJobExpireTime) { this.runningJobStateIMap = runningJobStateIMap; this.logger = logger; this.runningJobMasterMap = runningJobMasterMap; @@ -98,6 +100,7 @@ public JobHistoryService( this.finishedJobDAGInfoImap = finishedJobVertexInfoImap; this.objectMapper = new ObjectMapper(); this.objectMapper.configure(SerializationFeature.FAIL_ON_EMPTY_BEANS, false); + this.finishedJobExpireTime = finishedJobExpireTime; } // Gets the status of a running and completed job @@ -164,18 +167,17 @@ public String getJobDetailStateAsString(Long jobId) { return objectNode.toString(); } - @SuppressWarnings("checkstyle:MagicNumber") public void storeFinishedJobState(JobMaster jobMaster) { JobState jobState = toJobStateMapper(jobMaster, false); jobState.setFinishTime(System.currentTimeMillis()); - finishedJobStateImap.put(jobState.jobId, jobState, 14, TimeUnit.DAYS); + jobState.setErrorMessage(jobMaster.getErrorMessage()); + finishedJobStateImap.put(jobState.jobId, jobState, finishedJobExpireTime, TimeUnit.MINUTES); } - @SuppressWarnings("checkstyle:MagicNumber") public void storeFinishedPipelineMetrics(long jobId, JobMetrics metrics) { finishedJobMetricsImap.computeIfAbsent(jobId, key -> JobMetrics.of(new HashMap<>())); JobMetrics newMetrics = finishedJobMetricsImap.get(jobId).merge(metrics); - finishedJobMetricsImap.put(jobId, newMetrics, 14, TimeUnit.DAYS); + finishedJobMetricsImap.put(jobId, newMetrics, finishedJobExpireTime, TimeUnit.MINUTES); } private JobState toJobStateMapper(JobMaster jobMaster, boolean simple) { @@ -230,27 +232,31 @@ private JobState toJobStateMapper(JobMaster jobMaster, boolean simple) { JobStatus jobStatus = (JobStatus) runningJobStateIMap.get(jobId); String jobName = jobMaster.getJobImmutableInformation().getJobName(); long submitTime = jobMaster.getJobImmutableInformation().getCreateTime(); - return new JobState(jobId, jobName, jobStatus, submitTime, null, pipelineStateMapperMap); + return new JobState( + jobId, jobName, jobStatus, submitTime, null, pipelineStateMapperMap, null); } public void storeJobInfo(long jobId, JobDAGInfo jobInfo) { - finishedJobDAGInfoImap.put(jobId, jobInfo); + finishedJobDAGInfoImap.put(jobId, jobInfo, finishedJobExpireTime, TimeUnit.MINUTES); } @AllArgsConstructor @Data public static final class JobState implements Serializable { + private static final long serialVersionUID = -1176348098833918960L; private Long jobId; private String jobName; private JobStatus jobStatus; private long submitTime; private Long finishTime; private Map pipelineStateMapperMap; + private String errorMessage; } @AllArgsConstructor @Data public static final class PipelineStateData implements Serializable { + private static final long serialVersionUID = -7875004875757861958L; private PipelineStatus pipelineStatus; private Map executionStateMap; } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java index e14d946c8117..6246831843ed 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java @@ -147,6 +147,12 @@ public class JobMaster { private CheckpointConfig jobCheckpointConfig; + public String getErrorMessage() { + return errorMessage; + } + + private String errorMessage; + public JobMaster( @NonNull Data jobImmutableInformationData, @NonNull NodeEngine nodeEngine, @@ -258,10 +264,6 @@ private CheckpointConfig createJobCheckpointConfig( CheckpointConfig jobCheckpointConfig = new CheckpointConfig(); jobCheckpointConfig.setCheckpointTimeout(defaultCheckpointConfig.getCheckpointTimeout()); jobCheckpointConfig.setCheckpointInterval(defaultCheckpointConfig.getCheckpointInterval()); - jobCheckpointConfig.setMaxConcurrentCheckpoints( - defaultCheckpointConfig.getMaxConcurrentCheckpoints()); - jobCheckpointConfig.setTolerableFailureCheckpoints( - defaultCheckpointConfig.getTolerableFailureCheckpoints()); CheckpointStorageConfig jobCheckpointStorageConfig = new CheckpointStorageConfig(); jobCheckpointStorageConfig.setStorage(defaultCheckpointConfig.getStorage().getStorage()); @@ -276,6 +278,11 @@ private CheckpointConfig createJobCheckpointConfig( Long.parseLong( jobEnv.get(EnvCommonOptions.CHECKPOINT_INTERVAL.key()).toString())); } + if (jobEnv.containsKey(EnvCommonOptions.CHECKPOINT_TIMEOUT.key())) { + jobCheckpointConfig.setCheckpointTimeout( + Long.parseLong( + jobEnv.get(EnvCommonOptions.CHECKPOINT_TIMEOUT.key()).toString())); + } return jobCheckpointConfig; } @@ -290,6 +297,7 @@ public void initStateFuture() { if (JobStatus.FAILING.equals(v.getStatus())) { physicalPlan.updateJobState(JobStatus.FAILING, JobStatus.FAILED); } + JobMaster.this.errorMessage = v.getError(); JobResult jobResult = new JobResult(physicalPlan.getJobStatus(), v.getError()); cleanJob(); @@ -297,7 +305,6 @@ public void initStateFuture() { })); } - @SuppressWarnings("checkstyle:MagicNumber") public void run() { try { if (!restore) { @@ -666,7 +673,6 @@ public Map getOwnedSlotProfiles( return ownedSlotProfilesIMap.get(pipelineLocation); } - @SuppressWarnings("checkstyle:MagicNumber") public void setOwnedSlotProfiles( @NonNull PipelineLocation pipelineLocation, @NonNull Map pipelineOwnedSlotProfiles) { diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/persistence/FileMapStore.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/persistence/FileMapStore.java index ed47402e852d..d81f14978e64 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/persistence/FileMapStore.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/persistence/FileMapStore.java @@ -80,7 +80,6 @@ public Object load(Object key) { return null; } - @SuppressWarnings("checkstyle:MagicNumber") @SneakyThrows @Override public Map loadAll(Collection keys) { diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/resource/ResourceProfile.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/resource/ResourceProfile.java index bc0734028be7..247c1940d8e0 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/resource/ResourceProfile.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/resource/ResourceProfile.java @@ -19,7 +19,7 @@ import java.io.Serializable; -import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; public class ResourceProfile implements Serializable { diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestConstant.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestConstant.java index 0a5d8437be36..7776d592b8f6 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestConstant.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestConstant.java @@ -21,6 +21,7 @@ public class RestConstant { public static final String RUNNING_JOBS_URL = "/hazelcast/rest/maps/running-jobs"; public static final String RUNNING_JOB_URL = "/hazelcast/rest/maps/running-job"; + public static final String SUBMIT_JOB_URL = "/hazelcast/rest/maps/submit-job"; public static final String SYSTEM_MONITORING_INFORMATION = "/hazelcast/rest/maps/system-monitoring-information"; diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpGetCommandProcessor.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpGetCommandProcessor.java index 6c71ac2feb37..4c1debd6f87b 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpGetCommandProcessor.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpGetCommandProcessor.java @@ -243,7 +243,7 @@ private JsonObject convertToJson(JobInfo jobInfo, long jobId) { JobStatus jobStatus = getSeaTunnelServer().getCoordinatorService().getJobStatus(jobId); jobInfoJson - .add("jobId", jobId) + .add("jobId", String.valueOf(jobId)) .add("jobName", logicalDag.getJobConfig().getName()) .add("jobStatus", jobStatus.toString()) .add("envOptions", JsonUtil.toJsonObject(logicalDag.getJobConfig().getEnvOptions())) diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpPostCommandProcessor.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpPostCommandProcessor.java new file mode 100644 index 000000000000..e0edd9320320 --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpPostCommandProcessor.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.engine.server.rest; + +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.JsonNode; +import org.apache.seatunnel.shade.com.typesafe.config.Config; + +import org.apache.seatunnel.engine.common.Constant; +import org.apache.seatunnel.engine.common.config.JobConfig; +import org.apache.seatunnel.engine.common.utils.PassiveCompletableFuture; +import org.apache.seatunnel.engine.core.job.JobImmutableInformation; +import org.apache.seatunnel.engine.server.CoordinatorService; +import org.apache.seatunnel.engine.server.SeaTunnelServer; +import org.apache.seatunnel.engine.server.job.JobImmutableInformationEnv; +import org.apache.seatunnel.engine.server.log.Log4j2HttpPostCommandProcessor; +import org.apache.seatunnel.engine.server.utils.RestUtil; + +import com.hazelcast.internal.ascii.TextCommandService; +import com.hazelcast.internal.ascii.rest.HttpCommandProcessor; +import com.hazelcast.internal.ascii.rest.HttpPostCommand; +import com.hazelcast.internal.json.JsonObject; +import com.hazelcast.internal.serialization.Data; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import static com.hazelcast.internal.ascii.rest.HttpStatusCode.SC_400; +import static com.hazelcast.internal.ascii.rest.HttpStatusCode.SC_500; +import static org.apache.seatunnel.engine.server.rest.RestConstant.SUBMIT_JOB_URL; + +public class RestHttpPostCommandProcessor extends HttpCommandProcessor { + private final Log4j2HttpPostCommandProcessor original; + + public RestHttpPostCommandProcessor(TextCommandService textCommandService) { + this(textCommandService, new Log4j2HttpPostCommandProcessor(textCommandService)); + } + + protected RestHttpPostCommandProcessor( + TextCommandService textCommandService, + Log4j2HttpPostCommandProcessor log4j2HttpPostCommandProcessor) { + super( + textCommandService, + textCommandService.getNode().getLogger(Log4j2HttpPostCommandProcessor.class)); + this.original = log4j2HttpPostCommandProcessor; + } + + @Override + public void handle(HttpPostCommand httpPostCommand) { + String uri = httpPostCommand.getURI(); + try { + if (uri.startsWith(SUBMIT_JOB_URL)) { + handleSubmitJob(httpPostCommand, uri); + } else { + original.handle(httpPostCommand); + } + } catch (IllegalArgumentException e) { + prepareResponse(SC_400, httpPostCommand, exceptionResponse(e)); + } catch (Throwable e) { + logger.warning("An error occurred while handling request " + httpPostCommand, e); + prepareResponse(SC_500, httpPostCommand, exceptionResponse(e)); + } + + this.textCommandService.sendResponse(httpPostCommand); + } + + private SeaTunnelServer getSeaTunnelServer() { + Map extensionServices = + this.textCommandService.getNode().getNodeExtension().createExtensionServices(); + return (SeaTunnelServer) extensionServices.get(Constant.SEATUNNEL_SERVICE_NAME); + } + + private void handleSubmitJob(HttpPostCommand httpPostCommand, String uri) + throws IllegalArgumentException { + Map requestParams = new HashMap<>(); + RestUtil.buildRequestParams(requestParams, uri); + byte[] requestBody = httpPostCommand.getData(); + if (requestBody.length == 0) { + throw new IllegalArgumentException("Request body is empty."); + } + JsonNode requestBodyJsonNode; + try { + requestBodyJsonNode = RestUtil.convertByteToJsonNode(requestBody); + } catch (IOException e) { + throw new IllegalArgumentException("Invalid JSON format in request body."); + } + Config config = RestUtil.buildConfig(requestBodyJsonNode); + JobConfig jobConfig = new JobConfig(); + jobConfig.setName(requestParams.get("jobName")); + JobImmutableInformationEnv jobImmutableInformationEnv = + new JobImmutableInformationEnv( + jobConfig, + config, + textCommandService.getNode(), + Boolean.parseBoolean(requestParams.get("isStartWithSavePoint")), + Long.parseLong(requestParams.get("jobId"))); + JobImmutableInformation jobImmutableInformation = jobImmutableInformationEnv.build(); + CoordinatorService coordinatorService = getSeaTunnelServer().getCoordinatorService(); + Data data = + textCommandService + .getNode() + .nodeEngine + .getSerializationService() + .toData(jobImmutableInformation); + PassiveCompletableFuture voidPassiveCompletableFuture = + coordinatorService.submitJob( + Long.parseLong(jobConfig.getJobContext().getJobId()), data); + voidPassiveCompletableFuture.join(); + + Long jobId = jobImmutableInformationEnv.getJobId(); + this.prepareResponse( + httpPostCommand, + new JsonObject().add("jobId", jobId).add("jobName", requestParams.get("jobName"))); + } + + @Override + public void handleRejection(HttpPostCommand httpPostCommand) { + handle(httpPostCommand); + } +} diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/serializable/CheckpointDataSerializerHook.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/serializable/CheckpointDataSerializerHook.java index 3349a107035b..3f5ce72cb93e 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/serializable/CheckpointDataSerializerHook.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/serializable/CheckpointDataSerializerHook.java @@ -19,12 +19,15 @@ import org.apache.seatunnel.engine.common.serializeable.SeaTunnelFactoryIdConstant; import org.apache.seatunnel.engine.server.checkpoint.operation.CheckpointBarrierTriggerOperation; +import org.apache.seatunnel.engine.server.checkpoint.operation.CheckpointEndOperation; import org.apache.seatunnel.engine.server.checkpoint.operation.CheckpointErrorReportOperation; import org.apache.seatunnel.engine.server.checkpoint.operation.CheckpointFinishedOperation; import org.apache.seatunnel.engine.server.checkpoint.operation.NotifyTaskRestoreOperation; import org.apache.seatunnel.engine.server.checkpoint.operation.NotifyTaskStartOperation; import org.apache.seatunnel.engine.server.checkpoint.operation.TaskAcknowledgeOperation; import org.apache.seatunnel.engine.server.checkpoint.operation.TaskReportStatusOperation; +import org.apache.seatunnel.engine.server.checkpoint.operation.TriggerSchemaChangeAfterCheckpointOperation; +import org.apache.seatunnel.engine.server.checkpoint.operation.TriggerSchemaChangeBeforeCheckpointOperation; import com.hazelcast.internal.serialization.DataSerializerHook; import com.hazelcast.internal.serialization.impl.FactoryIdHelper; @@ -41,8 +44,11 @@ public final class CheckpointDataSerializerHook implements DataSerializerHook { public static final int NOTIFY_TASK_RESTORE_OPERATOR = 5; public static final int NOTIFY_TASK_START_OPERATOR = 6; - public static final int CHECKPOINT_ERROR_REPORT_OPERATOR = 7; + public static final int TRIGGER_SCHEMA_CHANGE_BEFORE_CHECKPOINT_OPERATOR = 8; + public static final int TRIGGER_SCHEMA_CHANGE_AFTER_CHECKPOINT_OPERATOR = 9; + + public static final int CHECKPOINT_END_OPERATOR = 10; public static final int FACTORY_ID = FactoryIdHelper.getFactoryId( @@ -60,7 +66,7 @@ public DataSerializableFactory createFactory() { } private static class Factory implements DataSerializableFactory { - @SuppressWarnings("checkstyle:returncount") + @Override public IdentifiedDataSerializable create(int typeId) { switch (typeId) { @@ -78,6 +84,12 @@ public IdentifiedDataSerializable create(int typeId) { return new NotifyTaskStartOperation(); case CHECKPOINT_ERROR_REPORT_OPERATOR: return new CheckpointErrorReportOperation(); + case TRIGGER_SCHEMA_CHANGE_BEFORE_CHECKPOINT_OPERATOR: + return new TriggerSchemaChangeBeforeCheckpointOperation(); + case TRIGGER_SCHEMA_CHANGE_AFTER_CHECKPOINT_OPERATOR: + return new TriggerSchemaChangeAfterCheckpointOperation(); + case CHECKPOINT_END_OPERATOR: + return new CheckpointEndOperation(); default: throw new IllegalArgumentException("Unknown type id " + typeId); } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/serializable/ClientToServerOperationDataSerializerHook.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/serializable/ClientToServerOperationDataSerializerHook.java index 188e4fe0657b..78cf18c06bda 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/serializable/ClientToServerOperationDataSerializerHook.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/serializable/ClientToServerOperationDataSerializerHook.java @@ -79,7 +79,6 @@ public DataSerializableFactory createFactory() { } private static class Factory implements DataSerializableFactory { - @SuppressWarnings("checkstyle:returncount") @Override public IdentifiedDataSerializable create(int typeId) { switch (typeId) { diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SeaTunnelSourceCollector.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SeaTunnelSourceCollector.java index 2a77a49729fd..0514d83c86d8 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SeaTunnelSourceCollector.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SeaTunnelSourceCollector.java @@ -21,21 +21,30 @@ import org.apache.seatunnel.api.common.metrics.Meter; import org.apache.seatunnel.api.common.metrics.MetricsContext; import org.apache.seatunnel.api.source.Collector; +import org.apache.seatunnel.api.table.event.SchemaChangeEvent; import org.apache.seatunnel.api.table.type.Record; import org.apache.seatunnel.engine.server.task.flow.OneInputFlowLifeCycle; +import lombok.extern.slf4j.Slf4j; + import java.io.IOException; import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; import static org.apache.seatunnel.api.common.metrics.MetricNames.SOURCE_RECEIVED_COUNT; import static org.apache.seatunnel.api.common.metrics.MetricNames.SOURCE_RECEIVED_QPS; +@Slf4j public class SeaTunnelSourceCollector implements Collector { private final Object checkpointLock; private final List>> outputs; + private final AtomicBoolean schemaChangeBeforeCheckpointSignal = new AtomicBoolean(false); + + private final AtomicBoolean schemaChangeAfterCheckpointSignal = new AtomicBoolean(false); + private final Counter sourceReceivedCount; private final Meter sourceReceivedQPS; @@ -64,6 +73,53 @@ public void collect(T row) { } } + @Override + public void collect(SchemaChangeEvent event) { + try { + sendRecordToNext(new Record<>(event)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void markSchemaChangeBeforeCheckpoint() { + if (schemaChangeAfterCheckpointSignal.get()) { + throw new IllegalStateException("schema-change-after checkpoint already marked."); + } + if (!schemaChangeBeforeCheckpointSignal.compareAndSet(false, true)) { + throw new IllegalStateException("schema-change-before checkpoint already marked."); + } + log.info("mark schema-change-before checkpoint signal."); + } + + @Override + public void markSchemaChangeAfterCheckpoint() { + if (schemaChangeBeforeCheckpointSignal.get()) { + throw new IllegalStateException("schema-change-before checkpoint already marked."); + } + if (!schemaChangeAfterCheckpointSignal.compareAndSet(false, true)) { + throw new IllegalStateException("schema-change-after checkpoint already marked."); + } + log.info("mark schema-change-after checkpoint signal."); + } + + public boolean captureSchemaChangeBeforeCheckpointSignal() { + if (schemaChangeBeforeCheckpointSignal.get()) { + log.info("capture schema-change-before checkpoint signal."); + return schemaChangeBeforeCheckpointSignal.getAndSet(false); + } + return false; + } + + public boolean captureSchemaChangeAfterCheckpointSignal() { + if (schemaChangeAfterCheckpointSignal.get()) { + log.info("capture schema-change-after checkpoint signal."); + return schemaChangeAfterCheckpointSignal.getAndSet(false); + } + return false; + } + @Override public Object getCheckpointLock() { return checkpointLock; diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SeaTunnelTask.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SeaTunnelTask.java index c752d45d6997..aaae5355982c 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SeaTunnelTask.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SeaTunnelTask.java @@ -33,6 +33,8 @@ import org.apache.seatunnel.engine.server.checkpoint.ActionSubtaskState; import org.apache.seatunnel.engine.server.checkpoint.CheckpointBarrier; import org.apache.seatunnel.engine.server.checkpoint.operation.TaskAcknowledgeOperation; +import org.apache.seatunnel.engine.server.checkpoint.operation.TriggerSchemaChangeAfterCheckpointOperation; +import org.apache.seatunnel.engine.server.checkpoint.operation.TriggerSchemaChangeBeforeCheckpointOperation; import org.apache.seatunnel.engine.server.dag.physical.config.IntermediateQueueConfig; import org.apache.seatunnel.engine.server.dag.physical.config.SinkConfig; import org.apache.seatunnel.engine.server.dag.physical.config.SourceConfig; @@ -59,6 +61,7 @@ import com.hazelcast.core.HazelcastInstance; import com.hazelcast.internal.metrics.MetricDescriptor; import com.hazelcast.internal.metrics.MetricsCollectionContext; +import com.hazelcast.spi.impl.operationservice.impl.InvocationFuture; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; @@ -133,7 +136,6 @@ public void init() throws Exception { .whenComplete((s, e) -> closeCalled = true); } - @SuppressWarnings("checkstyle:MagicNumber") protected void stateProcess() throws Exception { switch (currState) { case INIT: @@ -347,6 +349,24 @@ public void ack(Barrier barrier) { } } + public InvocationFuture triggerSchemaChangeBeforeCheckpoint() { + log.info( + "trigger schema-change-before checkpoint. jobID[{}], taskLocation[{}]", + jobID, + taskLocation); + return this.getExecutionContext() + .sendToMaster(new TriggerSchemaChangeBeforeCheckpointOperation(taskLocation)); + } + + public InvocationFuture triggerSchemaChangeAfterCheckpoint() { + log.info( + "trigger schema-change-after checkpoint. jobID[{}], taskLocation[{}]", + jobID, + taskLocation); + return this.getExecutionContext() + .sendToMaster(new TriggerSchemaChangeAfterCheckpointOperation(taskLocation)); + } + public void addState(Barrier barrier, ActionStateKey stateKey, List state) { List states = checkpointStates.computeIfAbsent(barrier.getId(), id -> new ArrayList<>()); @@ -365,6 +385,12 @@ public void notifyCheckpointAborted(long checkpointId) throws Exception { tryClose(checkpointId); } + @Override + public void notifyCheckpointEnd(long checkpointId) throws Exception { + notifyAllAction(listener -> listener.notifyCheckpointEnd(checkpointId)); + tryClose(checkpointId); + } + public void notifyAllAction(ConsumerWithException consumer) { allCycles.stream() .filter(cycle -> cycle instanceof InternalCheckpointListener) diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SinkAggregatedCommitterTask.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SinkAggregatedCommitterTask.java index a83f4bfb1dec..014e5b4cd145 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SinkAggregatedCommitterTask.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SinkAggregatedCommitterTask.java @@ -34,6 +34,7 @@ import org.apache.commons.collections4.CollectionUtils; import com.hazelcast.cluster.Address; +import lombok.Getter; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; @@ -45,6 +46,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; @@ -75,6 +77,8 @@ public class SinkAggregatedCommitterTask private final SinkAggregatedCommitter aggregatedCommitter; private transient Serializer aggregatedCommitInfoSerializer; + @Getter private transient Serializer commitInfoSerializer; + private Map writerAddressMap; private ConcurrentMap> commitInfoCache; @@ -107,6 +111,7 @@ public void init() throws Exception { this.writerAddressMap = new ConcurrentHashMap<>(); this.checkpointCommitInfoMap = new ConcurrentHashMap<>(); this.completableFuture = new CompletableFuture<>(); + this.commitInfoSerializer = sink.getSink().getCommitInfoSerializer().get(); this.aggregatedCommitInfoSerializer = sink.getSink().getAggregatedCommitInfoSerializer().get(); log.debug( @@ -127,7 +132,6 @@ public ProgressState call() throws Exception { return progress.toState(); } - @SuppressWarnings("checkstyle:MagicNumber") protected void stateProcess() throws Exception { switch (currState) { case INIT: @@ -193,6 +197,8 @@ public void close() throws IOException { @Override public void triggerBarrier(Barrier barrier) throws Exception { + long startTime = System.currentTimeMillis(); + log.debug("trigger barrier for sink agg commit [{}]", barrier); Integer count = checkpointBarrierCounter.compute( @@ -233,6 +239,12 @@ public void triggerBarrier(Barrier barrier) throws Exception { ActionStateKey.of(sink), -1, states)))) .join(); } + + log.debug( + "trigger barrier [{}] finished, cost {}ms. taskLocation [{}]", + barrier.getId(), + System.currentTimeMillis() - startTime, + taskLocation); } @Override @@ -242,6 +254,7 @@ public void restoreState(List actionStateList) throws Except actionStateList.stream() .map(ActionSubtaskState::getState) .flatMap(Collection::stream) + .filter(Objects::nonNull) .map( bytes -> sneaky( diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSeaTunnelTask.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSeaTunnelTask.java index 842cf8a6022d..8650dc7f2a68 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSeaTunnelTask.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSeaTunnelTask.java @@ -18,10 +18,11 @@ package org.apache.seatunnel.engine.server.task; import org.apache.seatunnel.api.common.metrics.MetricsContext; +import org.apache.seatunnel.api.serialization.Serializer; import org.apache.seatunnel.api.source.SourceSplit; import org.apache.seatunnel.engine.core.dag.actions.SourceAction; import org.apache.seatunnel.engine.server.dag.physical.config.SourceConfig; -import org.apache.seatunnel.engine.server.dag.physical.flow.Flow; +import org.apache.seatunnel.engine.server.dag.physical.flow.PhysicalExecutionFlow; import org.apache.seatunnel.engine.server.execution.ProgressState; import org.apache.seatunnel.engine.server.execution.TaskLocation; import org.apache.seatunnel.engine.server.task.flow.SourceFlowLifeCycle; @@ -29,6 +30,7 @@ import com.hazelcast.logging.ILogger; import com.hazelcast.logging.Logger; +import lombok.Getter; import lombok.NonNull; import java.util.List; @@ -41,15 +43,24 @@ public class SourceSeaTunnelTask extends SeaTunne private transient SeaTunnelSourceCollector collector; private transient Object checkpointLock; + @Getter private transient Serializer splitSerializer; + private final PhysicalExecutionFlow sourceFlow; - public SourceSeaTunnelTask(long jobID, TaskLocation taskID, int indexID, Flow executionFlow) { + public SourceSeaTunnelTask( + long jobID, + TaskLocation taskID, + int indexID, + PhysicalExecutionFlow executionFlow) { super(jobID, taskID, indexID, executionFlow); + this.sourceFlow = executionFlow; } @Override public void init() throws Exception { super.init(); this.checkpointLock = new Object(); + this.splitSerializer = sourceFlow.getAction().getSource().getSplitSerializer(); + LOGGER.info("starting seatunnel source task, index " + indexID); if (!(startFlowLifeCycle instanceof SourceFlowLifeCycle)) { throw new TaskRuntimeException( diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSplitEnumeratorTask.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSplitEnumeratorTask.java index 56ba10c48709..e2fe0c335a4e 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSplitEnumeratorTask.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSplitEnumeratorTask.java @@ -77,6 +77,7 @@ public class SourceSplitEnumeratorTask extends Coord private SeaTunnelSplitEnumeratorContext enumeratorContext; private Serializer enumeratorStateSerializer; + private Serializer splitSerializer; private int maxReaderSize; private Set unfinishedReaders; @@ -102,6 +103,7 @@ public void init() throws Exception { new SeaTunnelSplitEnumeratorContext<>( this.source.getParallelism(), this, getMetricsContext()); enumeratorStateSerializer = this.source.getSource().getEnumeratorStateSerializer(); + splitSerializer = this.source.getSource().getSplitSerializer(); taskMemberMapping = new ConcurrentHashMap<>(); taskIDToTaskLocationMapping = new ConcurrentHashMap<>(); taskIndexToTaskLocationMapping = new ConcurrentHashMap<>(); @@ -134,6 +136,8 @@ public ProgressState call() throws Exception { @Override public void triggerBarrier(Barrier barrier) throws Exception { + long startTime = System.currentTimeMillis(); + log.debug("split enumer trigger barrier [{}]", barrier); if (barrier.prepareClose()) { this.prepareCloseTriggered = true; @@ -164,6 +168,12 @@ public void triggerBarrier(Barrier barrier) throws Exception { Collections.singletonList(serialize))))) .join(); } + + log.debug( + "trigger barrier [{}] finished, cost {}ms. taskLocation [{}]", + barrier.getId(), + System.currentTimeMillis() - startTime, + taskLocation); } @Override @@ -186,12 +196,18 @@ public void restoreState(List actionStateList) throws Except log.debug("restoreState split enumerator [{}] finished", actionStateList); } + public Serializer getSplitSerializer() throws ExecutionException, InterruptedException { + // Because the splitSerializer is initialized in the init method, it's necessary to wait for + // the Enumerator to finish initializing. + getEnumerator(); + return splitSerializer; + } + public void addSplitsBack(List splits, int subtaskId) throws ExecutionException, InterruptedException { getEnumerator().addSplitsBack(splits, subtaskId); } - @SuppressWarnings("checkstyle:MagicNumber") public void receivedReader(TaskLocation readerId, Address memberAddr) throws InterruptedException, ExecutionException { log.info("received reader register, readerID: " + readerId); @@ -243,7 +259,6 @@ public TaskLocation getTaskMemberLocationByIndex(int taskIndex) { return taskIndexToTaskLocationMapping.get(taskIndex); } - @SuppressWarnings("checkstyle:MagicNumber") private SourceSplitEnumerator getEnumerator() throws InterruptedException, ExecutionException { // (restoreComplete == null) means that the Task has not yet executed Init, so we need to @@ -263,7 +278,6 @@ public void readerFinished(long taskID) { } } - @SuppressWarnings("checkstyle:MagicNumber") private void stateProcess() throws Exception { switch (currState) { case INIT: diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/context/SeaTunnelSplitEnumeratorContext.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/context/SeaTunnelSplitEnumeratorContext.java index c3cce03d3bd3..110562e49440 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/context/SeaTunnelSplitEnumeratorContext.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/context/SeaTunnelSplitEnumeratorContext.java @@ -21,7 +21,6 @@ import org.apache.seatunnel.api.source.SourceEvent; import org.apache.seatunnel.api.source.SourceSplit; import org.apache.seatunnel.api.source.SourceSplitEnumerator; -import org.apache.seatunnel.common.utils.SerializationUtils; import org.apache.seatunnel.engine.server.task.SourceSplitEnumeratorTask; import org.apache.seatunnel.engine.server.task.operation.source.AssignSplitOperation; @@ -31,6 +30,9 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; + +import static org.apache.seatunnel.engine.common.utils.ExceptionUtil.sneaky; @Slf4j public class SeaTunnelSplitEnumeratorContext @@ -67,22 +69,26 @@ public void assignSplit(int subtaskIndex, List splits) { log.warn("No reader is obtained, skip this assign!"); return; } + + List splitBytes = + splits.stream() + .map(split -> sneaky(() -> task.getSplitSerializer().serialize(split))) + .collect(Collectors.toList()); task.getExecutionContext() .sendToMember( new AssignSplitOperation<>( - task.getTaskMemberLocationByIndex(subtaskIndex), - SerializationUtils.serialize(splits.toArray())), + task.getTaskMemberLocationByIndex(subtaskIndex), splitBytes), task.getTaskMemberAddressByIndex(subtaskIndex)) .join(); } @Override public void signalNoMoreSplits(int subtaskIndex) { + List emptySplits = Collections.emptyList(); task.getExecutionContext() .sendToMember( new AssignSplitOperation<>( - task.getTaskMemberLocationByIndex(subtaskIndex), - SerializationUtils.serialize(Collections.emptyList().toArray())), + task.getTaskMemberLocationByIndex(subtaskIndex), emptySplits), task.getTaskMemberAddressByIndex(subtaskIndex)) .join(); } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/IntermediateQueueFlowLifeCycle.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/IntermediateQueueFlowLifeCycle.java index 8257997f9706..8765f49b984e 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/IntermediateQueueFlowLifeCycle.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/IntermediateQueueFlowLifeCycle.java @@ -46,7 +46,6 @@ public void received(Record record) { queue.received(record); } - @SuppressWarnings("checkstyle:MagicNumber") @Override public void collect(Collector> collector) throws Exception { queue.collect(collector); diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/ShuffleSinkFlowLifeCycle.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/ShuffleSinkFlowLifeCycle.java index 7054c8ac72d0..32ec5cb8f4d0 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/ShuffleSinkFlowLifeCycle.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/ShuffleSinkFlowLifeCycle.java @@ -17,6 +17,7 @@ package org.apache.seatunnel.engine.server.task.flow; +import org.apache.seatunnel.api.table.event.SchemaChangeEvent; import org.apache.seatunnel.api.table.type.Record; import org.apache.seatunnel.engine.core.dag.actions.ShuffleAction; import org.apache.seatunnel.engine.core.dag.actions.ShuffleStrategy; @@ -71,6 +72,8 @@ public ShuffleSinkFlowLifeCycle( @Override public void received(Record record) throws IOException { if (record.getData() instanceof Barrier) { + long startTime = System.currentTimeMillis(); + // flush shuffle buffer shuffleFlush(); @@ -93,6 +96,18 @@ public void received(Record record) throws IOException { throw new RuntimeException(e); } } + + log.debug( + "trigger barrier [{}] finished, cost: {}ms. taskLocation: [{}]", + barrier.getId(), + System.currentTimeMillis() - startTime, + runningTask.getTaskLocation()); + } else if (record.getData() instanceof SchemaChangeEvent) { + if (prepareClose) { + return; + } + + shuffleItem(record); } else { if (prepareClose) { return; diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/ShuffleSourceFlowLifeCycle.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/ShuffleSourceFlowLifeCycle.java index b32ba1c52439..2f14c6770114 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/ShuffleSourceFlowLifeCycle.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/ShuffleSourceFlowLifeCycle.java @@ -96,6 +96,8 @@ public void collect(Collector> collector) throws Exception { for (int recordIndex = 0; recordIndex < shuffleBatch.size(); recordIndex++) { Record record = shuffleBatch.get(recordIndex); if (record.getData() instanceof Barrier) { + long startTime = System.currentTimeMillis(); + Barrier barrier = (Barrier) record.getData(); // mark queue barrier @@ -117,6 +119,11 @@ public void collect(Collector> collector) throws Exception { runningTask.ack(barrier); collector.collect(record); + log.debug( + "trigger barrier [{}] finished, cost: {}ms. taskLocation: [{}]", + barrier.getId(), + System.currentTimeMillis() - startTime, + runningTask.getTaskLocation()); alignedBarriersCounter = 0; alignedBarriers.clear(); diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SinkFlowLifeCycle.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SinkFlowLifeCycle.java index 9e67a601eb9a..c51e3483c09a 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SinkFlowLifeCycle.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SinkFlowLifeCycle.java @@ -23,8 +23,8 @@ import org.apache.seatunnel.api.serialization.Serializer; import org.apache.seatunnel.api.sink.SinkCommitter; import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.event.SchemaChangeEvent; import org.apache.seatunnel.api.table.type.Record; -import org.apache.seatunnel.common.utils.SerializationUtils; import org.apache.seatunnel.engine.core.checkpoint.InternalCheckpointListener; import org.apache.seatunnel.engine.core.dag.actions.SinkAction; import org.apache.seatunnel.engine.server.checkpoint.ActionStateKey; @@ -39,6 +39,7 @@ import org.apache.seatunnel.engine.server.task.record.Barrier; import com.hazelcast.cluster.Address; +import lombok.extern.slf4j.Slf4j; import java.io.IOException; import java.io.Serializable; @@ -46,6 +47,7 @@ import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; @@ -56,6 +58,7 @@ import static org.apache.seatunnel.engine.common.utils.ExceptionUtil.sneaky; import static org.apache.seatunnel.engine.server.task.AbstractTask.serializeStates; +@Slf4j public class SinkFlowLifeCycle extends ActionFlowLifeCycle implements OneInputFlowLifeCycle>, InternalCheckpointListener { @@ -63,6 +66,7 @@ public class SinkFlowLifeCycle sinkAction; private SinkWriter writer; + private transient Optional> commitInfoSerializer; private transient Optional> writerStateSerializer; private final int indexID; @@ -107,6 +111,7 @@ public SinkFlowLifeCycle( @Override public void init() throws Exception { + this.commitInfoSerializer = sinkAction.getSink().getCommitInfoSerializer(); this.writerStateSerializer = sinkAction.getSink().getWriterStateSerializer(); this.committer = sinkAction.getSink().createCommitter(); this.lastCommitInfo = Optional.empty(); @@ -150,6 +155,8 @@ private void registerCommitter() { public void received(Record record) { try { if (record.getData() instanceof Barrier) { + long startTime = System.currentTimeMillis(); + Barrier barrier = (Barrier) record.getData(); if (barrier.prepareClose()) { prepareClose = true; @@ -179,10 +186,14 @@ public void received(Record record) { runningTask .getExecutionContext() .sendToMember( - new SinkPrepareCommitOperation( + new SinkPrepareCommitOperation( barrier, committerTaskLocation, - SerializationUtils.serialize(commitInfoT)), + commitInfoSerializer.isPresent() + ? commitInfoSerializer + .get() + .serialize(commitInfoT) + : null), committerTaskAddress) .join(); } @@ -197,6 +208,18 @@ public void received(Record record) { } } runningTask.ack(barrier); + + log.debug( + "trigger barrier [{}] finished, cost {}ms. taskLocation [{}]", + barrier.getId(), + System.currentTimeMillis() - startTime, + taskLocation); + } else if (record.getData() instanceof SchemaChangeEvent) { + if (prepareClose) { + return; + } + SchemaChangeEvent event = (SchemaChangeEvent) record.getData(); + writer.applySchemaChange(event); } else { if (prepareClose) { return; @@ -230,9 +253,9 @@ public void restoreState(List actionStateList) throws Except if (writerStateSerializer.isPresent()) { states = actionStateList.stream() - .filter(state -> writerStateSerializer.isPresent()) .map(ActionSubtaskState::getState) .flatMap(Collection::stream) + .filter(Objects::nonNull) .map( bytes -> sneaky( diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SourceFlowLifeCycle.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SourceFlowLifeCycle.java index 9ca01eba322d..572836fe5177 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SourceFlowLifeCycle.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SourceFlowLifeCycle.java @@ -23,11 +23,12 @@ import org.apache.seatunnel.api.source.SourceReader; import org.apache.seatunnel.api.source.SourceSplit; import org.apache.seatunnel.api.table.type.Record; -import org.apache.seatunnel.common.utils.SerializationUtils; +import org.apache.seatunnel.engine.core.checkpoint.CheckpointType; import org.apache.seatunnel.engine.core.checkpoint.InternalCheckpointListener; import org.apache.seatunnel.engine.core.dag.actions.SourceAction; import org.apache.seatunnel.engine.server.checkpoint.ActionStateKey; import org.apache.seatunnel.engine.server.checkpoint.ActionSubtaskState; +import org.apache.seatunnel.engine.server.checkpoint.CheckpointBarrier; import org.apache.seatunnel.engine.server.execution.TaskLocation; import org.apache.seatunnel.engine.server.task.SeaTunnelSourceCollector; import org.apache.seatunnel.engine.server.task.SeaTunnelTask; @@ -41,17 +42,22 @@ import org.apache.seatunnel.engine.server.task.record.Barrier; import com.hazelcast.cluster.Address; +import lombok.AccessLevel; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; import lombok.extern.slf4j.Slf4j; import java.io.IOException; +import java.io.Serializable; import java.util.Collection; import java.util.List; import java.util.Objects; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; -import static org.apache.seatunnel.engine.common.utils.ExceptionUtil.sneaky; import static org.apache.seatunnel.engine.server.task.AbstractTask.serializeStates; @Slf4j @@ -75,6 +81,8 @@ public class SourceFlowLifeCycle extends ActionFl private final MetricsContext metricsContext; + private final AtomicReference schemaChangePhase = new AtomicReference<>(); + public SourceFlowLifeCycle( SourceAction sourceAction, int indexID, @@ -132,12 +140,39 @@ public void close() throws IOException { public void collect() throws Exception { if (!prepareClose) { + if (schemaChanging()) { + log.debug("schema is changing, stop reader collect records"); + + Thread.sleep(200); + return; + } + reader.pollNext(collector); if (collector.isEmptyThisPollNext()) { Thread.sleep(100); } else { collector.resetEmptyThisPollNext(); } + + if (collector.captureSchemaChangeBeforeCheckpointSignal()) { + if (schemaChangePhase.get() != null) { + throw new IllegalStateException( + "previous schema changes in progress, schemaChangePhase: " + + schemaChangePhase.get()); + } + runningTask.triggerSchemaChangeBeforeCheckpoint().get(); + schemaChangePhase.set(SchemaChangePhase.createBeforePhase()); + log.info("triggered schema-change-before checkpoint, stopping collect data"); + } else if (collector.captureSchemaChangeAfterCheckpointSignal()) { + if (schemaChangePhase.get() != null) { + throw new IllegalStateException( + "previous schema changes in progress, schemaChangePhase: " + + schemaChangePhase.get()); + } + runningTask.triggerSchemaChangeAfterCheckpoint().get(); + schemaChangePhase.set(SchemaChangePhase.createAfterPhase()); + log.info("triggered schema-change-after checkpoint, stopping collect data"); + } } else { Thread.sleep(100); } @@ -214,6 +249,9 @@ public void receivedSplits(List splits) { public void triggerBarrier(Barrier barrier) throws Exception { log.debug("source trigger barrier [{}]", barrier); + + long startTime = System.currentTimeMillis(); + // Block the reader from adding barrier to the collector. synchronized (collector.getCheckpointLock()) { if (barrier.prepareClose()) { @@ -230,6 +268,38 @@ public void triggerBarrier(Barrier barrier) throws Exception { collector.sendRecordToNext(new Record<>(barrier)); log.debug("send record to next finished, taskId: [{}]", runningTask.getTaskID()); } + + log.debug( + "trigger barrier [{}] finished, cost: {}ms. taskLocation: [{}]", + barrier.getId(), + System.currentTimeMillis() - startTime, + currentTaskLocation); + + CheckpointType checkpointType = ((CheckpointBarrier) barrier).getCheckpointType(); + if (schemaChanging() && checkpointType.isSchemaChangeCheckpoint()) { + if (checkpointType.isSchemaChangeBeforeCheckpoint() + && schemaChangePhase.get().isBeforePhase()) { + schemaChangePhase.get().setCheckpointId(barrier.getId()); + } else if (checkpointType.isSchemaChangeAfterCheckpoint() + && schemaChangePhase.get().isAfterPhase()) { + schemaChangePhase.get().setCheckpointId(barrier.getId()); + } else { + throw new IllegalStateException( + String.format( + "schema-change checkpoint[%s,%s] and phase[%s] is not matched", + barrier.getId(), + checkpointType, + schemaChangePhase.get().getPhase())); + } + log.info( + "lock checkpoint[{}] waiting for complete..., phase: [{}]", + barrier.getId(), + schemaChangePhase.get().getPhase()); + } + } + + private boolean schemaChanging() { + return schemaChangePhase.get() != null; } @Override @@ -240,6 +310,25 @@ public void notifyCheckpointComplete(long checkpointId) throws Exception { @Override public void notifyCheckpointAborted(long checkpointId) throws Exception { reader.notifyCheckpointAborted(checkpointId); + if (schemaChangePhase.get() != null + && schemaChangePhase.get().getCheckpointId() == checkpointId) { + throw new IllegalStateException( + String.format( + "schema-change checkpoint[%s] is aborted, phase: [%s]", + checkpointId, schemaChangePhase.get().getPhase())); + } + } + + @Override + public void notifyCheckpointEnd(long checkpointId) throws Exception { + if (schemaChangePhase.get() != null + && schemaChangePhase.get().getCheckpointId() == checkpointId) { + log.info( + "notify schema-change checkpoint[{}] end, phase: [{}]", + checkpointId, + schemaChangePhase.get().getPhase()); + schemaChangePhase.set(null); + } } @Override @@ -247,21 +336,17 @@ public void restoreState(List actionStateList) throws Except if (actionStateList.isEmpty()) { return; } - List splits = + List splits = actionStateList.stream() .map(ActionSubtaskState::getState) .flatMap(Collection::stream) .filter(Objects::nonNull) - .map(bytes -> sneaky(() -> splitSerializer.deserialize(bytes))) .collect(Collectors.toList()); try { runningTask .getExecutionContext() .sendToMember( - new RestoredSplitOperation( - enumeratorTaskLocation, - SerializationUtils.serialize(splits.toArray()), - indexID), + new RestoredSplitOperation(enumeratorTaskLocation, splits, indexID), enumeratorTaskAddress) .get(); } catch (InterruptedException | ExecutionException e) { @@ -269,4 +354,38 @@ public void restoreState(List actionStateList) throws Except throw new RuntimeException(e); } } + + @Getter + @ToString + @RequiredArgsConstructor(access = AccessLevel.PRIVATE) + private static class SchemaChangePhase implements Serializable { + private static final String PHASE_CHANGE_BEFORE = "SCHEMA-CHANGE-BEFORE"; + private static final String PHASE_CHANGE_AFTER = "SCHEMA-CHANGE-AFTER"; + + private final String phase; + private volatile long checkpointId = -1; + + public static SchemaChangePhase createBeforePhase() { + return new SchemaChangePhase(PHASE_CHANGE_BEFORE); + } + + public static SchemaChangePhase createAfterPhase() { + return new SchemaChangePhase(PHASE_CHANGE_AFTER); + } + + public boolean isBeforePhase() { + return PHASE_CHANGE_BEFORE.equals(phase); + } + + public boolean isAfterPhase() { + return PHASE_CHANGE_AFTER.equals(phase); + } + + public void setCheckpointId(long checkpointId) { + if (this.checkpointId != -1) { + throw new IllegalStateException("checkpointId is already set"); + } + this.checkpointId = checkpointId; + } + } } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/group/queue/IntermediateBlockingQueue.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/group/queue/IntermediateBlockingQueue.java index 5b2de4c50c88..f7cd7b0c2346 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/group/queue/IntermediateBlockingQueue.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/group/queue/IntermediateBlockingQueue.java @@ -42,7 +42,6 @@ public void received(Record record) { } } - @SuppressWarnings("checkstyle:MagicNumber") @Override public void collect(Collector> collector) throws Exception { while (true) { diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/group/queue/IntermediateDisruptor.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/group/queue/IntermediateDisruptor.java index a452995881e9..e2f3ed69d775 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/group/queue/IntermediateDisruptor.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/group/queue/IntermediateDisruptor.java @@ -44,7 +44,6 @@ public void received(Record record) { getIntermediateQueueFlowLifeCycle()); } - @SuppressWarnings("checkstyle:MagicNumber") @Override public void collect(Collector> collector) throws Exception { if (!isExecuted) { diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/group/queue/disruptor/RecordEventProducer.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/group/queue/disruptor/RecordEventProducer.java index 094ae9acf887..021bb8d2f083 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/group/queue/disruptor/RecordEventProducer.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/group/queue/disruptor/RecordEventProducer.java @@ -26,7 +26,6 @@ public class RecordEventProducer { - @SuppressWarnings("checkstyle:MagicNumber") public static void onData( Record record, RingBuffer ringBuffer, diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/GetMetricsOperation.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/GetMetricsOperation.java index 15003a641ddc..8d9c5d7f987d 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/GetMetricsOperation.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/GetMetricsOperation.java @@ -31,17 +31,22 @@ import com.hazelcast.spi.impl.operationservice.Operation; import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; import java.util.function.Predicate; -public class GetMetricsOperation extends Operation implements IdentifiedDataSerializable { +import static org.apache.seatunnel.api.common.metrics.MetricTags.JOB_ID; - private Predicate metricDescriptorPredicate; +public class GetMetricsOperation extends Operation implements IdentifiedDataSerializable { private RawJobMetrics response; + private Set runningJobIds; public GetMetricsOperation() {} - public GetMetricsOperation(Predicate metricDescriptorPredicate) { - this.metricDescriptorPredicate = metricDescriptorPredicate; + public GetMetricsOperation(Set runningJobIds) { + this.runningJobIds = runningJobIds; } @Override @@ -60,6 +65,10 @@ public void run() { + " because it is not master. Master is: " + masterAddress); } + Predicate metricDescriptorPredicate = + dis -> + (dis.tagValue(JOB_ID) != null + && runningJobIds.contains(Long.parseLong(dis.tagValue(JOB_ID)))); ZetaMetricsCollector metricsRenderer = new ZetaMetricsCollector( @@ -71,13 +80,15 @@ public void run() { @Override protected void writeInternal(ObjectDataOutput out) throws IOException { super.writeInternal(out); - out.writeObject(metricDescriptorPredicate); + out.writeLongArray(runningJobIds.stream().mapToLong(Long::longValue).toArray()); } @Override protected void readInternal(ObjectDataInput in) throws IOException { super.readInternal(in); - this.metricDescriptorPredicate = in.readObject(); + this.runningJobIds = + Arrays.stream(Objects.requireNonNull(in.readLongArray())) + .collect(HashSet::new, HashSet::add, HashSet::addAll); } @Override diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/sink/SinkPrepareCommitOperation.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/sink/SinkPrepareCommitOperation.java index 06945a61b254..5ed6f81a7aa2 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/sink/SinkPrepareCommitOperation.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/sink/SinkPrepareCommitOperation.java @@ -17,7 +17,6 @@ package org.apache.seatunnel.engine.server.task.operation.sink; -import org.apache.seatunnel.common.utils.SerializationUtils; import org.apache.seatunnel.engine.server.SeaTunnelServer; import org.apache.seatunnel.engine.server.TaskExecutionService; import org.apache.seatunnel.engine.server.execution.TaskLocation; @@ -33,7 +32,7 @@ import java.io.IOException; @NoArgsConstructor -public class SinkPrepareCommitOperation extends BarrierFlowOperation { +public class SinkPrepareCommitOperation extends BarrierFlowOperation { private byte[] commitInfos; public SinkPrepareCommitOperation( @@ -73,15 +72,24 @@ public int getClassId() { public void run() throws Exception { TaskExecutionService taskExecutionService = ((SeaTunnelServer) getService()).getTaskExecutionService(); - SinkAggregatedCommitterTask committerTask = + SinkAggregatedCommitterTask committerTask = taskExecutionService.getTask(taskLocation); - ClassLoader classLoader = + ClassLoader taskClassLoader = taskExecutionService .getExecutionContext(taskLocation.getTaskGroupLocation()) .getClassLoader(); + ClassLoader mainClassLoader = Thread.currentThread().getContextClassLoader(); + if (commitInfos != null) { - committerTask.receivedWriterCommitInfo( - barrier.getId(), SerializationUtils.deserialize(commitInfos, classLoader)); + CommitInfoT deserializeCommitInfo = null; + try { + Thread.currentThread().setContextClassLoader(taskClassLoader); + deserializeCommitInfo = + committerTask.getCommitInfoSerializer().deserialize(commitInfos); + } finally { + Thread.currentThread().setContextClassLoader(mainClassLoader); + } + committerTask.receivedWriterCommitInfo(barrier.getId(), deserializeCommitInfo); } committerTask.triggerBarrier(barrier); } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/source/AssignSplitOperation.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/source/AssignSplitOperation.java index 637a48e8ab43..b21111e18fd4 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/source/AssignSplitOperation.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/source/AssignSplitOperation.java @@ -19,7 +19,6 @@ import org.apache.seatunnel.api.source.SourceSplit; import org.apache.seatunnel.common.utils.RetryUtils; -import org.apache.seatunnel.common.utils.SerializationUtils; import org.apache.seatunnel.engine.common.Constant; import org.apache.seatunnel.engine.server.SeaTunnelServer; import org.apache.seatunnel.engine.server.exception.TaskGroupContextNotFoundException; @@ -33,18 +32,18 @@ import com.hazelcast.spi.impl.operationservice.Operation; import java.io.IOException; -import java.util.Arrays; -import java.util.stream.Collectors; +import java.util.ArrayList; +import java.util.List; public class AssignSplitOperation extends Operation implements IdentifiedDataSerializable { - private byte[] splits; + private List splits; private TaskLocation taskID; public AssignSplitOperation() {} - public AssignSplitOperation(TaskLocation taskID, byte[] splits) { + public AssignSplitOperation(TaskLocation taskID, List splits) { this.taskID = taskID; this.splits = splits; } @@ -56,13 +55,22 @@ public void run() throws Exception { () -> { SourceSeaTunnelTask task = server.getTaskExecutionService().getTask(taskID); - ClassLoader classLoader = + ClassLoader taskClassLoader = server.getTaskExecutionService() .getExecutionContext(taskID.getTaskGroupLocation()) .getClassLoader(); - Object[] o = SerializationUtils.deserialize(splits, classLoader); - task.receivedSourceSplit( - Arrays.stream(o).map(i -> (SplitT) i).collect(Collectors.toList())); + ClassLoader mainClassLoader = Thread.currentThread().getContextClassLoader(); + List deserializeSplits = new ArrayList<>(); + try { + Thread.currentThread().setContextClassLoader(taskClassLoader); + for (byte[] split : this.splits) { + deserializeSplits.add(task.getSplitSerializer().deserialize(split)); + } + } finally { + Thread.currentThread().setContextClassLoader(mainClassLoader); + } + + task.receivedSourceSplit(deserializeSplits); return null; }, new RetryUtils.RetryMaterial( @@ -76,13 +84,20 @@ public void run() throws Exception { @Override protected void writeInternal(ObjectDataOutput out) throws IOException { - out.writeByteArray(splits); + out.writeInt(splits.size()); + for (byte[] split : splits) { + out.writeByteArray(split); + } out.writeObject(taskID); } @Override protected void readInternal(ObjectDataInput in) throws IOException { - splits = in.readByteArray(); + int splitCount = in.readInt(); + splits = new ArrayList<>(splitCount); + for (int i = 0; i < splitCount; i++) { + splits.add(in.readByteArray()); + } taskID = in.readObject(); } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/source/RestoredSplitOperation.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/source/RestoredSplitOperation.java index 0c9c3d95c902..05fbf6537e00 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/source/RestoredSplitOperation.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/source/RestoredSplitOperation.java @@ -19,7 +19,6 @@ import org.apache.seatunnel.api.source.SourceSplit; import org.apache.seatunnel.common.utils.RetryUtils; -import org.apache.seatunnel.common.utils.SerializationUtils; import org.apache.seatunnel.engine.common.Constant; import org.apache.seatunnel.engine.server.SeaTunnelServer; import org.apache.seatunnel.engine.server.TaskExecutionService; @@ -34,19 +33,18 @@ import com.hazelcast.nio.ObjectDataOutput; import java.io.IOException; -import java.util.Arrays; +import java.util.ArrayList; import java.util.List; -import java.util.stream.Collectors; public class RestoredSplitOperation extends TaskOperation { - private byte[] splits; + private List splits; private Integer subtaskIndex; public RestoredSplitOperation() {} public RestoredSplitOperation( - TaskLocation enumeratorLocation, byte[] splits, int subtaskIndex) { + TaskLocation enumeratorLocation, List splits, int subtaskIndex) { super(enumeratorLocation); this.splits = splits; this.subtaskIndex = subtaskIndex; @@ -55,14 +53,21 @@ public RestoredSplitOperation( @Override protected void writeInternal(ObjectDataOutput out) throws IOException { super.writeInternal(out); - out.writeByteArray(splits); + out.writeInt(splits.size()); + for (byte[] split : splits) { + out.writeByteArray(split); + } out.writeInt(subtaskIndex); } @Override protected void readInternal(ObjectDataInput in) throws IOException { super.readInternal(in); - splits = in.readByteArray(); + int splitCount = in.readInt(); + splits = new ArrayList<>(splitCount); + for (int i = 0; i < splitCount; i++) { + splits.add(in.readByteArray()); + } subtaskIndex = in.readInt(); } @@ -82,27 +87,31 @@ public void run() throws Exception { TaskExecutionService taskExecutionService = server.getTaskExecutionService(); RetryUtils.retryWithException( () -> { - ClassLoader classLoader = + SourceSplitEnumeratorTask task = + taskExecutionService.getTask(taskLocation); + ClassLoader taskClassLoader = taskExecutionService .getExecutionContext(taskLocation.getTaskGroupLocation()) .getClassLoader(); + ClassLoader mainClassLoader = Thread.currentThread().getContextClassLoader(); + + List deserializeSplits = new ArrayList<>(); + try { + Thread.currentThread().setContextClassLoader(taskClassLoader); + for (byte[] split : splits) { + deserializeSplits.add(task.getSplitSerializer().deserialize(split)); + } + } finally { + Thread.currentThread().setContextClassLoader(mainClassLoader); + } - List deserialize = - Arrays.stream( - (Object[]) - SerializationUtils.deserialize( - splits, classLoader)) - .map(o -> (SourceSplit) o) - .collect(Collectors.toList()); - SourceSplitEnumeratorTask task = - taskExecutionService.getTask(taskLocation); task.getExecutionContext() .getTaskExecutionService() .asyncExecuteFunction( taskLocation.getTaskGroupLocation(), () -> { try { - task.addSplitsBack(deserialize, subtaskIndex); + task.addSplitsBack(deserializeSplits, subtaskIndex); } catch (Exception e) { task.getExecutionContext() .sendToMaster( diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/utils/RestUtil.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/utils/RestUtil.java new file mode 100644 index 000000000000..d3761366d095 --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/utils/RestUtil.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.engine.server.utils; + +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.JsonNode; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.seatunnel.shade.com.typesafe.config.Config; + +import org.apache.seatunnel.common.Constants; +import org.apache.seatunnel.common.utils.JsonUtils; +import org.apache.seatunnel.core.starter.utils.ConfigBuilder; + +import com.hazelcast.internal.util.StringUtil; + +import java.io.IOException; +import java.util.Map; + +public class RestUtil { + private RestUtil() {} + + private static final ObjectMapper objectMapper = new ObjectMapper(); + + public static JsonNode convertByteToJsonNode(byte[] byteData) throws IOException { + return objectMapper.readTree(byteData); + } + + public static void buildRequestParams(Map requestParams, String uri) { + requestParams.put("jobId", null); + requestParams.put("jobName", Constants.LOGO); + requestParams.put("isStartWithSavePoint", String.valueOf(false)); + uri = StringUtil.stripTrailingSlash(uri); + if (!uri.contains("?")) { + return; + } + int indexEnd = uri.indexOf('?'); + try { + for (String s : uri.substring(indexEnd + 1).split("&")) { + String[] param = s.split("="); + requestParams.put(param[0], param[1]); + } + } catch (IndexOutOfBoundsException e) { + throw new IllegalArgumentException("Invalid Params format in Params."); + } + } + + public static Config buildConfig(JsonNode jsonNode) { + Map objectMap = JsonUtils.toMap(jsonNode); + return ConfigBuilder.of(objectMap); + } +} diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/CoordinatorServiceTest.java b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/CoordinatorServiceTest.java index 3cb224e69e05..320ce05e3e86 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/CoordinatorServiceTest.java +++ b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/CoordinatorServiceTest.java @@ -82,7 +82,6 @@ public void testMasterNodeActive() { instance2.shutdown(); } - @SuppressWarnings("checkstyle:RegexpSingleline") @Test public void testClearCoordinatorService() throws MalformedURLException, NoSuchMethodException, InvocationTargetException, diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/TestUtils.java b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/TestUtils.java index 0481256ce461..571a3c95db47 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/TestUtils.java +++ b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/TestUtils.java @@ -52,7 +52,6 @@ public static String getResource(String confFile) { return System.getProperty("user.dir") + "/src/test/resources/" + confFile; } - @SuppressWarnings("checkstyle:MagicNumber") public static LogicalDag getTestLogicalDag(JobContext jobContext) throws MalformedURLException { IdGenerator idGenerator = new IdGenerator(); FakeSource fakeSource = new FakeSource(); diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointTimeOutTest.java b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointTimeOutTest.java new file mode 100644 index 000000000000..bb9c0149025a --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointTimeOutTest.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.engine.server.checkpoint; + +import org.apache.seatunnel.engine.common.utils.PassiveCompletableFuture; +import org.apache.seatunnel.engine.core.dag.logical.LogicalDag; +import org.apache.seatunnel.engine.core.job.JobImmutableInformation; +import org.apache.seatunnel.engine.core.job.JobStatus; +import org.apache.seatunnel.engine.server.AbstractSeaTunnelServerTest; +import org.apache.seatunnel.engine.server.TestUtils; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import com.hazelcast.internal.serialization.Data; +import lombok.extern.slf4j.Slf4j; + +import java.util.Collections; +import java.util.concurrent.TimeUnit; + +import static org.awaitility.Awaitility.await; + +@Slf4j +public class CheckpointTimeOutTest extends AbstractSeaTunnelServerTest { + + public static String CONF_PATH = "stream_fake_to_console_checkpointTimeOut.conf"; + public static long JOB_ID = System.currentTimeMillis(); + + @Test + @Disabled("Currently unstable tests, waiting for @EricJoy2048 to refactor state handling logic") + public void testJobLevelCheckpointTimeOut() { + startJob(JOB_ID, CONF_PATH); + + await().atMost(120000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertEquals( + server.getCoordinatorService().getJobStatus(JOB_ID), + JobStatus.RUNNING)); + + await().atMost(360000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> { + log.info( + "Job status: {}", + server.getCoordinatorService().getJobStatus(JOB_ID)); + Assertions.assertEquals( + server.getCoordinatorService().getJobStatus(JOB_ID), + JobStatus.FAILED); + }); + } + + private void startJob(Long jobid, String path) { + LogicalDag testLogicalDag = TestUtils.createTestLogicalPlan(path, jobid.toString(), jobid); + + JobImmutableInformation jobImmutableInformation = + new JobImmutableInformation( + jobid, + "Test", + false, + nodeEngine.getSerializationService().toData(testLogicalDag), + testLogicalDag.getJobConfig(), + Collections.emptyList()); + + Data data = nodeEngine.getSerializationService().toData(jobImmutableInformation); + + PassiveCompletableFuture voidPassiveCompletableFuture = + server.getCoordinatorService().submitJob(jobid, data); + voidPassiveCompletableFuture.join(); + } +} diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/checkpoint/SavePointTest.java b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/checkpoint/SavePointTest.java index 804e5c455e5d..fdf02c7513b7 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/checkpoint/SavePointTest.java +++ b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/checkpoint/SavePointTest.java @@ -45,7 +45,6 @@ public class SavePointTest extends AbstractSeaTunnelServerTest { public static long JOB_ID = 823342L; @Test - @Disabled() public void testSavePoint() throws InterruptedException { savePointAndRestore(false); } diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/master/JobMetricsTest.java b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/master/JobMetricsTest.java index 5b0a75abf58b..896037edcf69 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/master/JobMetricsTest.java +++ b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/master/JobMetricsTest.java @@ -96,7 +96,6 @@ public void testGetJobMetrics() throws Exception { } @Test - @SuppressWarnings("checkstyle:RegexpSingleline") public void testMetricsOnJobRestart() throws InterruptedException { long jobId3 = System.currentTimeMillis() + 323475L; diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/resources/seatunnel.yaml b/seatunnel-engine/seatunnel-engine-server/src/test/resources/seatunnel.yaml index 8f22b0613cad..f8739cc48301 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/test/resources/seatunnel.yaml +++ b/seatunnel-engine/seatunnel-engine-server/src/test/resources/seatunnel.yaml @@ -25,8 +25,6 @@ seatunnel: checkpoint: interval: 6000 timeout: 7000 - max-concurrent: 1 - tolerable-failure: 2 storage: type: hdfs max-retained: 3 diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/resources/stream_fake_to_console_checkpointTimeOut.conf b/seatunnel-engine/seatunnel-engine-server/src/test/resources/stream_fake_to_console_checkpointTimeOut.conf new file mode 100644 index 000000000000..2d541ac2acd7 --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-server/src/test/resources/stream_fake_to_console_checkpointTimeOut.conf @@ -0,0 +1,54 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + # You can set flink configuration here + execution.parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 1000 + checkpoint.timeout = 100 +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + FakeSource { + result_table_name = "fake1" + row.num = 1000 + split.num = 100 + split.read-interval = 3000 + parallelism = 1 + schema = { + fields { + name = "string" + age = "int" + } + } + parallelism = 1 + } +} + +transform { +} + +sink { + console { + log.print.delay.ms=5000 + } +} \ No newline at end of file diff --git a/seatunnel-engine/seatunnel-engine-storage/checkpoint-storage-plugins/checkpoint-storage-hdfs/src/main/java/org/apache/seatunnel/engine/checkpoint/storage/hdfs/common/HdfsConfiguration.java b/seatunnel-engine/seatunnel-engine-storage/checkpoint-storage-plugins/checkpoint-storage-hdfs/src/main/java/org/apache/seatunnel/engine/checkpoint/storage/hdfs/common/HdfsConfiguration.java index 8d41ae848d86..953da3027bd0 100644 --- a/seatunnel-engine/seatunnel-engine-storage/checkpoint-storage-plugins/checkpoint-storage-hdfs/src/main/java/org/apache/seatunnel/engine/checkpoint/storage/hdfs/common/HdfsConfiguration.java +++ b/seatunnel-engine/seatunnel-engine-storage/checkpoint-storage-plugins/checkpoint-storage-hdfs/src/main/java/org/apache/seatunnel/engine/checkpoint/storage/hdfs/common/HdfsConfiguration.java @@ -49,6 +49,8 @@ public class HdfsConfiguration extends AbstractConfiguration { private static final String HDFS_IMPL_KEY = "fs.hdfs.impl"; + private static final String SEATUNNEL_HADOOP_PREFIX = "seatunnel.hadoop."; + @Override public Configuration buildConfiguration(Map config) throws CheckpointStorageException { @@ -69,7 +71,15 @@ public Configuration buildConfiguration(Map config) authenticateKerberos(kerberosPrincipal, kerberosKeytabFilePath, hadoopConf); } } - // todo support other hdfs optional config keys + // support other hdfs optional config keys + config.entrySet().stream() + .filter(entry -> entry.getKey().startsWith(SEATUNNEL_HADOOP_PREFIX)) + .forEach( + entry -> { + String key = entry.getKey().replace(SEATUNNEL_HADOOP_PREFIX, ""); + String value = entry.getValue(); + hadoopConf.set(key, value); + }); return hadoopConf; } diff --git a/seatunnel-engine/seatunnel-engine-storage/checkpoint-storage-plugins/checkpoint-storage-hdfs/src/test/java/org/apache/seatunnel/engine/checkpoint/storage/hdfs/HDFSFileCheckpointTest.java b/seatunnel-engine/seatunnel-engine-storage/checkpoint-storage-plugins/checkpoint-storage-hdfs/src/test/java/org/apache/seatunnel/engine/checkpoint/storage/hdfs/HDFSFileCheckpointTest.java new file mode 100644 index 000000000000..23a41a2782ba --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-storage/checkpoint-storage-plugins/checkpoint-storage-hdfs/src/test/java/org/apache/seatunnel/engine/checkpoint/storage/hdfs/HDFSFileCheckpointTest.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.seatunnel.engine.checkpoint.storage.hdfs; + +import org.apache.seatunnel.engine.checkpoint.storage.exception.CheckpointStorageException; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; + +import java.util.HashMap; +import java.util.Map; + +@Disabled( + "HDFS is not available in CI, if you want to run this test, please set up your own HDFS environment") +public class HDFSFileCheckpointTest extends AbstractFileCheckPointTest { + + @BeforeAll + public static void setup() throws CheckpointStorageException { + Map config = new HashMap<>(); + config.put("storage.type", "hdfs"); + config.put("fs.defaultFS", "hdfs://usdp-bing"); + config.put("seatunnel.hadoop.dfs.nameservices", "usdp-bing"); + config.put("seatunnel.hadoop.dfs.ha.namenodes.usdp-bing", "nn1,nn2"); + config.put("seatunnel.hadoop.dfs.namenode.rpc-address.usdp-bing.nn1", "usdp-bing-nn1:8020"); + config.put("seatunnel.hadoop.dfs.namenode.rpc-address.usdp-bing.nn2", "usdp-bing-nn2:8020"); + config.put( + "seatunnel.hadoop.dfs.client.failover.proxy.provider.usdp-bing", + "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"); + STORAGE = new HdfsStorage(config); + initStorageData(); + } +} diff --git a/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-file/pom.xml b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-file/pom.xml index a742fe39a0b9..c7eb61012e68 100644 --- a/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-file/pom.xml +++ b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-file/pom.xml @@ -78,6 +78,19 @@ hadoop-aliyun ${hadoop-aliyun.version} provided + + + net.minidev + json-smart + + + + + + net.minidev + json-smart + ${json-smart.version} + provided diff --git a/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-file/src/main/java/org/apache/seatunnel/engine/imap/storage/file/common/WALDataUtils.java b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-file/src/main/java/org/apache/seatunnel/engine/imap/storage/file/common/WALDataUtils.java index e475fde53ace..ffa38502d792 100644 --- a/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-file/src/main/java/org/apache/seatunnel/engine/imap/storage/file/common/WALDataUtils.java +++ b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-file/src/main/java/org/apache/seatunnel/engine/imap/storage/file/common/WALDataUtils.java @@ -20,7 +20,6 @@ package org.apache.seatunnel.engine.imap.storage.file.common; -// CHECKSTYLE:OFF public class WALDataUtils { public static final int WAL_DATA_METADATA_LENGTH = 12; diff --git a/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/resources/examples/fake_to_console.conf b/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/resources/examples/fake_to_console.conf index 0f927351fb63..a09137dc033a 100644 --- a/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/resources/examples/fake_to_console.conf +++ b/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/resources/examples/fake_to_console.conf @@ -47,7 +47,7 @@ source { transform { # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, - # please go to https://seatunnel.apache.org/docs/category/transform + # please go to https://seatunnel.apache.org/docs/category/transform-v2 } sink { diff --git a/seatunnel-examples/seatunnel-spark-connector-v2-example/src/main/resources/examples/spark.batch.conf b/seatunnel-examples/seatunnel-spark-connector-v2-example/src/main/resources/examples/spark.batch.conf index f3ad1b5f73cc..cf0958ecdee6 100644 --- a/seatunnel-examples/seatunnel-spark-connector-v2-example/src/main/resources/examples/spark.batch.conf +++ b/seatunnel-examples/seatunnel-spark-connector-v2-example/src/main/resources/examples/spark.batch.conf @@ -79,7 +79,7 @@ transform { } # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, - # please go to https://seatunnel.apache.org/docs/category/transform + # please go to https://seatunnel.apache.org/docs/category/transform-v2 } sink { diff --git a/seatunnel-formats/pom.xml b/seatunnel-formats/pom.xml index 983a8629ce89..7fc09b356a03 100644 --- a/seatunnel-formats/pom.xml +++ b/seatunnel-formats/pom.xml @@ -30,6 +30,7 @@ seatunnel-format-json seatunnel-format-text seatunnel-format-compatible-debezium-json + seatunnel-format-compatible-connect-json diff --git a/seatunnel-formats/seatunnel-format-compatible-connect-json/pom.xml b/seatunnel-formats/seatunnel-format-compatible-connect-json/pom.xml new file mode 100644 index 000000000000..d3d554574281 --- /dev/null +++ b/seatunnel-formats/seatunnel-format-compatible-connect-json/pom.xml @@ -0,0 +1,62 @@ + + + + 4.0.0 + + org.apache.seatunnel + seatunnel-formats + ${revision} + + + seatunnel-format-compatible-connect-json + SeaTunnel : Formats : Compatible Kafka Connect Json + + 1.6.4.Final + + + + + org.apache.seatunnel + seatunnel-api + ${project.version} + provided + + + + org.apache.seatunnel + seatunnel-format-json + ${project.version} + provided + + + + org.apache.kafka + kafka-clients + 3.2.0 + provided + + + + org.apache.kafka + connect-json + 3.2.0 + provided + + + + + diff --git a/seatunnel-formats/seatunnel-format-compatible-connect-json/src/main/java/org/apache/seatunnel/format/compatible/kafka/connect/json/CompatibleKafkaConnectDeserializationSchema.java b/seatunnel-formats/seatunnel-format-compatible-connect-json/src/main/java/org/apache/seatunnel/format/compatible/kafka/connect/json/CompatibleKafkaConnectDeserializationSchema.java new file mode 100644 index 000000000000..b2e6ac97e977 --- /dev/null +++ b/seatunnel-formats/seatunnel-format-compatible-connect-json/src/main/java/org/apache/seatunnel/format/compatible/kafka/connect/json/CompatibleKafkaConnectDeserializationSchema.java @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.format.compatible.kafka.connect.json; + +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.seatunnel.shade.com.typesafe.config.Config; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.serialization.DeserializationSchema; +import org.apache.seatunnel.api.source.Collector; +import org.apache.seatunnel.api.table.type.RowKind; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.common.exception.CommonErrorCode; +import org.apache.seatunnel.common.utils.ReflectionUtils; +import org.apache.seatunnel.format.json.JsonToRowConverters; +import org.apache.seatunnel.format.json.exception.SeaTunnelJsonFormatException; + +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.SchemaAndValue; +import org.apache.kafka.connect.json.JsonConverter; +import org.apache.kafka.connect.json.JsonConverterConfig; +import org.apache.kafka.connect.sink.SinkRecord; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ArrayNode; +import lombok.NonNull; +import lombok.RequiredArgsConstructor; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.Collections; +import java.util.Map; + +import static com.google.common.base.Preconditions.checkNotNull; + +/** Compatible kafka connect deserialization schema */ +@RequiredArgsConstructor +public class CompatibleKafkaConnectDeserializationSchema + implements DeserializationSchema { + + private static final String INCLUDE_SCHEMA_METHOD = "convertToJsonWithEnvelope"; + private static final String EXCLUDE_SCHEMA_METHOD = "convertToJsonWithoutEnvelope"; + private static final String KAFKA_CONNECT_SINK_RECORD_PAYLOAD = "payload"; + private transient JsonConverter keyConverter; + private transient JsonConverter valueConverter; + private transient Method keyConverterMethod; + private transient Method valueConverterMethod; + private final SeaTunnelRowType seaTunnelRowType; + private final JsonToRowConverters.JsonToRowConverter runtimeConverter; + private final boolean keySchemaEnable; + private final boolean valueSchemaEnable; + /** Object mapper for parsing the JSON. */ + private final ObjectMapper objectMapper = new ObjectMapper(); + + public CompatibleKafkaConnectDeserializationSchema( + @NonNull SeaTunnelRowType seaTunnelRowType, + @NonNull Config config, + boolean failOnMissingField, + boolean ignoreParseErrors) { + + Map configMap = ReadonlyConfig.fromConfig(config).toMap(); + this.seaTunnelRowType = seaTunnelRowType; + this.keySchemaEnable = + KafkaConnectJsonFormatOptions.getKeyConverterSchemaEnabled(configMap); + this.valueSchemaEnable = + KafkaConnectJsonFormatOptions.getValueConverterSchemaEnabled(configMap); + + // Runtime converter + this.runtimeConverter = + new JsonToRowConverters(failOnMissingField, ignoreParseErrors) + .createConverter(checkNotNull(seaTunnelRowType)); + } + + @Override + public SeaTunnelRow deserialize(byte[] message) throws IOException { + throw new UnsupportedEncodingException(); + } + + /** + * Deserialize kafka consumer record + * + * @param msg + * @param out + * @throws Exception + */ + public void deserialize(ConsumerRecord msg, Collector out) + throws InvocationTargetException, IllegalAccessException { + tryInitConverter(); + SinkRecord record = convertToSinkRecord(msg); + RowKind rowKind = RowKind.INSERT; + JsonNode jsonNode = + (JsonNode) + valueConverterMethod.invoke( + valueConverter, record.valueSchema(), record.value()); + JsonNode payload = jsonNode.get(KAFKA_CONNECT_SINK_RECORD_PAYLOAD); + if (payload.isArray()) { + ArrayNode arrayNode = (ArrayNode) payload; + for (int i = 0; i < arrayNode.size(); i++) { + SeaTunnelRow row = convertJsonNode(arrayNode.get(i)); + row.setRowKind(rowKind); + out.collect(row); + } + } else { + SeaTunnelRow row = convertJsonNode(payload); + row.setRowKind(rowKind); + out.collect(row); + } + } + + private SeaTunnelRow convertJsonNode(JsonNode jsonNode) { + if (jsonNode.isNull()) { + return null; + } + try { + org.apache.seatunnel.shade.com.fasterxml.jackson.databind.JsonNode jsonData = + objectMapper.readTree(jsonNode.toString()); + return (SeaTunnelRow) runtimeConverter.convert(jsonData); + } catch (Throwable t) { + throw new SeaTunnelJsonFormatException( + CommonErrorCode.JSON_OPERATION_FAILED, + String.format("Failed to deserialize JSON '%s'.", jsonNode), + t); + } + } + + private SinkRecord convertToSinkRecord(ConsumerRecord msg) { + SchemaAndValue keyAndSchema = + (msg.key() == null) + ? SchemaAndValue.NULL + : keyConverter.toConnectData(msg.topic(), msg.headers(), msg.key()); + SchemaAndValue valueAndSchema = + valueConverter.toConnectData(msg.topic(), msg.headers(), msg.value()); + return new SinkRecord( + msg.topic(), + msg.partition(), + keyAndSchema.schema(), + keyAndSchema.value(), + valueAndSchema.schema(), + valueAndSchema.value(), + msg.offset(), + msg.timestamp(), + msg.timestampType(), + null); + } + + @Override + public SeaTunnelDataType getProducedType() { + return seaTunnelRowType; + } + + private void tryInitConverter() { + if (keyConverter == null) { + synchronized (this) { + if (keyConverter == null) { + keyConverter = new JsonConverter(); + keyConverter.configure( + Collections.singletonMap( + JsonConverterConfig.SCHEMAS_ENABLE_CONFIG, keySchemaEnable), + true); + keyConverterMethod = + ReflectionUtils.getDeclaredMethod( + JsonConverter.class, + keySchemaEnable + ? INCLUDE_SCHEMA_METHOD + : EXCLUDE_SCHEMA_METHOD, + Schema.class, + Object.class) + .get(); + } + } + } + if (valueConverter == null) { + synchronized (this) { + if (valueConverter == null) { + valueConverter = new JsonConverter(); + valueConverter.configure( + Collections.singletonMap( + JsonConverterConfig.SCHEMAS_ENABLE_CONFIG, valueSchemaEnable), + false); + valueConverterMethod = + ReflectionUtils.getDeclaredMethod( + JsonConverter.class, + valueSchemaEnable + ? INCLUDE_SCHEMA_METHOD + : EXCLUDE_SCHEMA_METHOD, + Schema.class, + Object.class) + .get(); + } + } + } + } +} diff --git a/seatunnel-formats/seatunnel-format-compatible-connect-json/src/main/java/org/apache/seatunnel/format/compatible/kafka/connect/json/KafkaConnectJsonFormatOptions.java b/seatunnel-formats/seatunnel-format-compatible-connect-json/src/main/java/org/apache/seatunnel/format/compatible/kafka/connect/json/KafkaConnectJsonFormatOptions.java new file mode 100644 index 000000000000..05e16e0abb79 --- /dev/null +++ b/seatunnel-formats/seatunnel-format-compatible-connect-json/src/main/java/org/apache/seatunnel/format/compatible/kafka/connect/json/KafkaConnectJsonFormatOptions.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.format.compatible.kafka.connect.json; + +import org.apache.seatunnel.api.configuration.Option; +import org.apache.seatunnel.api.configuration.Options; + +import java.util.Map; + +public class KafkaConnectJsonFormatOptions { + + public static final Option KEY_CONVERTER_SCHEMA_ENABLED = + Options.key("key_converter_schema_enabled") + .booleanType() + .defaultValue(true) + .withDescription("kafka connect key converter schema enabled."); + + public static final Option VALUE_CONVERTER_SCHEMA_ENABLED = + Options.key("value_converter_schema_enabled") + .booleanType() + .defaultValue(true) + .withDescription("kafka connect value converter schema enabled."); + + public static boolean getKeyConverterSchemaEnabled(Map options) { + return Boolean.parseBoolean( + options.getOrDefault(KEY_CONVERTER_SCHEMA_ENABLED.key(), "true")); + } + + public static boolean getValueConverterSchemaEnabled(Map options) { + return Boolean.parseBoolean( + options.getOrDefault(VALUE_CONVERTER_SCHEMA_ENABLED.key(), "true")); + } +} diff --git a/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java b/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java index 86f25a69b514..1ec0499fb5e9 100644 --- a/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java +++ b/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java @@ -187,9 +187,13 @@ private Object convert(String field, SeaTunnelDataType fieldType, int level) String[] kvs = field.split(separators[level + 1]); for (String kv : kvs) { String[] splits = kv.split(separators[level + 2]); - objectMap.put( - convert(splits[0], keyType, level + 1), - convert(splits[1], valueType, level + 1)); + if (splits.length < 2) { + objectMap.put(convert(splits[0], keyType, level + 1), null); + } else { + objectMap.put( + convert(splits[0], keyType, level + 1), + convert(splits[1], valueType, level + 1)); + } } return objectMap; case STRING: diff --git a/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/TextFormatSchemaTest.java b/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/TextFormatSchemaTest.java index 7d904e2c8fcd..57e99d49b69e 100644 --- a/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/TextFormatSchemaTest.java +++ b/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/TextFormatSchemaTest.java @@ -45,7 +45,14 @@ public class TextFormatSchemaTest { + '\002' + "Kris" + '\003' - + "21\001" + + "21" + + '\002' + + "nullValueKey" + + '\003' + + '\002' + + '\003' + + "1231" + + "\001" + "tyrantlucifer\001" + "true\001" + "1\001" @@ -66,7 +73,6 @@ public class TextFormatSchemaTest { public SeaTunnelRowType seaTunnelRowType; - @SuppressWarnings("checkstyle:Indentation") @BeforeEach public void initSeaTunnelRowType() { seaTunnelRowType = diff --git a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java index 8b4ed6332b36..c638d7f60997 100644 --- a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java +++ b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java @@ -228,7 +228,6 @@ public T createPluginInstance(PluginIdentifier pluginIdentifier, Collection * * @return the all plugin identifier of the engine */ - @SuppressWarnings("checkstyle:WhitespaceAfter") public Map> getAllPlugin() throws IOException { List factories; diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/SQLEngine.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/SQLEngine.java index b1e734c31ef4..6dfaddca00a9 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/SQLEngine.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/SQLEngine.java @@ -23,7 +23,11 @@ import java.util.List; public interface SQLEngine { - void init(String inputTableName, SeaTunnelRowType inputRowType, String sql); + void init( + String inputTableName, + String catalogTableName, + SeaTunnelRowType inputRowType, + String sql); SeaTunnelRowType typeMapping(List inputColumnsMapping); diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/SQLTransform.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/SQLTransform.java index 20a07dcee02e..9b21c4b6f5c4 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/SQLTransform.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/SQLTransform.java @@ -115,7 +115,11 @@ protected void setConfig(Config pluginConfig) { @Override public void open() { sqlEngine = SQLEngineFactory.getSQLEngine(engineType); - sqlEngine.init(inputTableName, inputRowType, query); + sqlEngine.init( + inputTableName, + inputCatalogTable != null ? inputCatalogTable.getTableId().getTableName() : null, + inputRowType, + query); } private void tryOpen() { diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLEngine.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLEngine.java index 55fbe04cf13c..2f01fe3af98d 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLEngine.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLEngine.java @@ -37,6 +37,8 @@ import net.sf.jsqlparser.statement.select.SelectExpressionItem; import net.sf.jsqlparser.statement.select.SelectItem; +import javax.annotation.Nullable; + import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -45,6 +47,7 @@ public class ZetaSQLEngine implements SQLEngine { private String inputTableName; + @Nullable private String catalogTableName; private SeaTunnelRowType inputRowType; private String sql; @@ -59,8 +62,13 @@ public class ZetaSQLEngine implements SQLEngine { public ZetaSQLEngine() {} @Override - public void init(String inputTableName, SeaTunnelRowType inputRowType, String sql) { + public void init( + String inputTableName, + String catalogTableName, + SeaTunnelRowType inputRowType, + String sql) { this.inputTableName = inputTableName; + this.catalogTableName = catalogTableName; this.inputRowType = inputRowType; this.sql = sql; @@ -109,7 +117,8 @@ private void validateSQL(Statement statement) { throw new IllegalArgumentException("Unsupported table alias name syntax"); } String tableName = table.getName(); - if (!inputTableName.equalsIgnoreCase(tableName)) { + if (!inputTableName.equalsIgnoreCase(tableName) + && !tableName.equalsIgnoreCase(catalogTableName)) { throw new IllegalArgumentException( String.format("Table name: %s not found", tableName)); } diff --git a/seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLEngineTest.java b/seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLEngineTest.java new file mode 100644 index 000000000000..94e1060af859 --- /dev/null +++ b/seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLEngineTest.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.transform.sql.zeta; + +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.transform.exception.TransformException; +import org.apache.seatunnel.transform.sql.SQLEngine; +import org.apache.seatunnel.transform.sql.SQLEngineFactory; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class ZetaSQLEngineTest { + + @Test + public void testCatalogNameAndSourceTableNameBothSupport() { + + SQLEngine sqlEngine = SQLEngineFactory.getSQLEngine(SQLEngineFactory.EngineType.ZETA); + + SeaTunnelRowType rowType = + new SeaTunnelRowType( + new String[] {"id", "name", "age"}, + new SeaTunnelDataType[] { + BasicType.INT_TYPE, BasicType.STRING_TYPE, BasicType.INT_TYPE + }); + sqlEngine.init("test", null, rowType, "select * from test"); + sqlEngine.init("test", "nameFromCatalog", rowType, "select * from test"); + sqlEngine.init("test", "nameFromCatalog", rowType, "select * from nameFromCatalog"); + + Assertions.assertThrows( + TransformException.class, + () -> sqlEngine.init("test", "nameFromCatalog", rowType, "select * from unknown")); + Assertions.assertThrows( + TransformException.class, + () -> sqlEngine.init("test", null, rowType, "select * from unknown")); + } +} diff --git a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/BaseSeaTunnelSourceFunction.java b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/BaseSeaTunnelSourceFunction.java index f0c6fc2ae7d7..14f83ccd398e 100644 --- a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/BaseSeaTunnelSourceFunction.java +++ b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/BaseSeaTunnelSourceFunction.java @@ -80,7 +80,6 @@ public void open(Configuration parameters) throws Exception { protected abstract BaseSourceFunction createInternalSource(); - @SuppressWarnings("checkstyle:MagicNumber") @Override public void run(SourceFunction.SourceContext sourceContext) throws Exception { internalSource.run( diff --git a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/utils/TypeConverterUtils.java b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/utils/TypeConverterUtils.java index c904f52b5132..fc8b4f6b3cb8 100644 --- a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/utils/TypeConverterUtils.java +++ b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/utils/TypeConverterUtils.java @@ -43,7 +43,6 @@ import java.util.HashMap; import java.util.Map; -@SuppressWarnings("checkstyle:MagicNumber") public class TypeConverterUtils { private static final Map, BridgedType> BRIDGED_TYPES = new HashMap<>(32); diff --git a/tools/update_modules_check/update_modules_check.py b/tools/update_modules_check/update_modules_check.py index 324a961bc7e2..b009fda25a4a 100644 --- a/tools/update_modules_check/update_modules_check.py +++ b/tools/update_modules_check/update_modules_check.py @@ -172,10 +172,6 @@ def main(argv): get_cv2_modules(argv[2]) elif argv[1] == "cv2-e2e": get_cv2_e2e_modules(argv[2]) - elif argv[1] == "cv2-flink-e2e": - get_cv2_flink_e2e_modules(argv[2]) - elif argv[1] == "cv2-spark-e2e": - get_cv2_spark_e2e_modules(argv[2]) elif argv[1] == "engine": get_engine_modules(argv[2]) elif argv[1] == "engine-e2e":