diff --git a/.github/dockerfiles/docker_tag b/.github/dockerfiles/docker_tag index 1dc77e89521bfe..6e17dfb246030a 100644 --- a/.github/dockerfiles/docker_tag +++ b/.github/dockerfiles/docker_tag @@ -1 +1 @@ -pr-27882 +pr-25673 diff --git a/.github/dockerfiles/ov_build/ubuntu_22_04_riscv/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_22_04_riscv/Dockerfile index 5911016b37d008..8b955def2aec00 100644 --- a/.github/dockerfiles/ov_build/ubuntu_22_04_riscv/Dockerfile +++ b/.github/dockerfiles/ov_build/ubuntu_22_04_riscv/Dockerfile @@ -62,10 +62,13 @@ RUN echo deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy main restricte RUN dpkg --add-architecture riscv64 && \ apt-get update -o Dir::Etc::sourcelist=/etc/apt/sources.list.d/riscv64-sources.list && \ - apt-get install -y --no-install-recommends libpython3-dev:riscv64 + apt-get install -y --no-install-recommends libpython3-dev:riscv64 && \ + apt-get install libgomp1:riscv64 && \ + apt-get install libatomic1:riscv64 # Setup pip ENV PIP_VERSION="24.0" RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ python3 get-pip.py --no-cache-dir pip==${PIP_VERSION} && \ rm -f get-pip.py + diff --git a/.github/scripts/workflow_rerun/errors_to_look_for.json b/.github/scripts/workflow_rerun/errors_to_look_for.json index b9cac8f17adaa6..d8fe6ac2df03d2 100644 --- a/.github/scripts/workflow_rerun/errors_to_look_for.json +++ b/.github/scripts/workflow_rerun/errors_to_look_for.json @@ -86,5 +86,25 @@ { "error_text": "because the GET request got Content-Type", "ticket": 158400 + }, + { + "error_text": "Unable to make request:", + "ticket": 158401 + }, + { + "error_text": "Failed to make request", + "ticket": 158401 + }, + { + "error_text": "Failure when receiving data from the peer", + "ticket": 159323 + }, + { + "error_text": "HTTP response code said error", + "ticket": 159398 + }, + { + "error_text": "download failed after attempts", + "ticket": 159547 } ] \ No newline at end of file diff --git a/.github/workflows/cleanup_caches.yml b/.github/workflows/cleanup_caches.yml index d6633fd9dab3ee..c3aac30ccd4379 100644 --- a/.github/workflows/cleanup_caches.yml +++ b/.github/workflows/cleanup_caches.yml @@ -4,7 +4,7 @@ on: schedule: # at 00:00 on the 1st day of every month - cron: '0 0 1 * *' - + permissions: read-all jobs: @@ -61,8 +61,8 @@ jobs: cache-path: ${{ env.CCACHE_PATH }} recursive: true key: '.' - - + + Cleanup_ccache_win: name: Cleanup Windows ccache runs-on: 'aks-win-4-cores-8gb' diff --git a/.github/workflows/dev_cpu_linux_riscv.yml b/.github/workflows/dev_cpu_linux_riscv.yml new file mode 100644 index 00000000000000..daeb42149539f8 --- /dev/null +++ b/.github/workflows/dev_cpu_linux_riscv.yml @@ -0,0 +1,269 @@ +name: Linux RISC-V CPU workflow with Xuantie (Ubuntu 22.04, Python 3.10) + +on: + workflow_dispatch: + inputs: + testFilter: + description: 'Filter for google tests' + required: true + default: '*smoke_AdaPoolAvg4DLayoutTest*' + pull_request: + paths: + - '.github/workflows/dev_cpu_linux_riscv.yml' + +env: + CMAKE_GENERATOR: 'Ninja Multi-Config' + XUANTIE_BIN_PATH: /mount/testdata1 + XUANTIE_DIR: /__w/openvino/xuantie + XUANTIE_TAR_NAME: 'Xuantie-900-gcc-linux-5.15.0-glibc-x86_64-V2.8.1' + +concurrency: + # github.ref is not unique in post-commit + group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-dev-cpu-linux-riscv + cancel-in-progress: true + +permissions: read-all + +jobs: + Smart_CI: + runs-on: ubuntu-latest + outputs: + affected_components: "${{ steps.smart_ci.outputs.affected_components }}" + changed_components: "${{ steps.smart_ci.outputs.changed_components }}" + skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" + steps: + - name: checkout action + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + sparse-checkout: .github/actions/smart-ci + + - name: Get affected components + id: smart_ci + uses: ./.github/actions/smart-ci + with: + repository: ${{ github.repository }} + pr: ${{ github.event.number }} + commit_sha: ${{ github.sha }} + ref_name: ${{ github.ref_name }} + component_pattern: "category: (.*)" + repo_token: ${{ secrets.GITHUB_TOKEN }} + skip_when_only_listed_labels_set: 'docs' + skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg,*/layer_tests_summary/*,*/conformance/*' + + Docker: + needs: Smart_CI + runs-on: aks-linux-4-cores-16gb-docker-build + container: + image: openvinogithubactions.azurecr.io/docker_build:0.2 + volumes: + - /mount:/mount + outputs: + images: "${{ steps.handle_docker.outputs.images }}" + steps: + - name: Checkout + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - uses: ./.github/actions/handle_docker + id: handle_docker + with: + images: | + ov_build/ubuntu_22_04_riscv + registry: 'openvinogithubactions.azurecr.io' + dockerfiles_root_dir: '.github/dockerfiles' + changed_components: ${{ needs.smart_ci.outputs.changed_components }} + + Build: + needs: [Smart_CI, Docker] + timeout-minutes: 150 + defaults: + run: + shell: bash + runs-on: aks-linux-16-cores-32gb + container: + image: ${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_22_04_riscv }} + volumes: + - /mount:/mount + env: + CMAKE_BUILD_TYPE: 'Release' + CMAKE_CXX_COMPILER_LAUNCHER: ccache + CMAKE_C_COMPILER_LAUNCHER: ccache + GITHUB_WORKSPACE: '/__w/openvino/openvino' + OPENVINO_REPO: /__w/openvino/openvino/openvino + INSTALL_DIR: /__w/openvino/openvino/openvino_install + INSTALL_TEST_DIR: /__w/openvino/openvino/tests_install + BUILD_DIR: /__w/openvino/openvino/openvino_build + CCACHE_REMOTE_DIR: /mount/caches/ccache/ubuntu22_riscv64_xuantie/${{ github.base_ref || github.ref_name }} + CCACHE_DIR: /__w/openvino/openvino/ccache + CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp + CCACHE_MAXSIZE: 2G + if: "!needs.smart_ci.outputs.skip_workflow" + + steps: + - name: Clone OpenVINO + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + path: ${{ env.OPENVINO_REPO }} + submodules: 'true' + + # + # Print system info + # + + - name: System info + uses: ./openvino/.github/actions/system_info + + - name: Setup ccache + id: ccache_restore + uses: ./openvino/.github/actions/cache + with: + save-always: ${{ github.event_name == 'push' && 'true' || 'false' }} + cache-size: 10 + max-cache-size: 50 + cache-path: ${{ env.CCACHE_REMOTE_DIR }} + path: ${{ env.CCACHE_DIR }} + key: ${{ runner.os }}-${{ runner.arch }}-ccache-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-${{ runner.arch }}-ccache + + - name: Init XUANTIE + run: | + mkdir ${XUANTIE_DIR} + tar -xvf ${XUANTIE_BIN_PATH}/${XUANTIE_TAR_NAME}.tar -C ${XUANTIE_DIR} + chmod -R +x ${XUANTIE_DIR}/${XUANTIE_TAR_NAME} + + - name: Clean ccache stats + run: ccache --zero-stats + + - name: CMake configure - OpenVINO + run: | + cmake \ + -G "${CMAKE_GENERATOR}" \ + -DENABLE_CPPLINT=OFF \ + -DENABLE_NCC_STYLE=OFF \ + -DENABLE_TESTS=ON \ + -DENABLE_INTEL_CPU=ON \ + -DENABLE_INTEL_GPU=OFF \ + -DENABLE_INTEL_NPU=OFF \ + -DENABLE_SAMPLES=OFF \ + -DCMAKE_TOOLCHAIN_FILE=${OPENVINO_REPO}/cmake/toolchains/riscv64-071-xuantie-gnu.toolchain.cmake \ + -DRISCV_TOOLCHAIN_ROOT=${XUANTIE_DIR}/${XUANTIE_TAR_NAME} \ + -DENABLE_PYTHON=OFF \ + -DENABLE_PYTHON_PACKAGING=ON \ + -DENABLE_WHEEL=OFF \ + -DENABLE_STRICT_DEPENDENCIES=OFF \ + -DCMAKE_VERBOSE_MAKEFILE=ON \ + -DCPACK_GENERATOR=TGZ \ + -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \ + -DCMAKE_CXX_COMPILER_LAUNCHER=${{ env.CMAKE_CXX_COMPILER_LAUNCHER }} \ + -DCMAKE_C_COMPILER_LAUNCHER=${{ env.CMAKE_C_COMPILER_LAUNCHER }} \ + -S ${OPENVINO_REPO} \ + -B ${BUILD_DIR} + + - name: Cmake build - OpenVINO + run: cmake --build ${BUILD_DIR} --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} + + - name: Show ccache stats + run: ccache --show-stats + + - name: Cmake install - OpenVINO + run: | + cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -P ${BUILD_DIR}/cmake_install.cmake + cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_TEST_DIR} -DCOMPONENT=tests -P ${BUILD_DIR}/cmake_install.cmake + + - name: Pack Artifacts + run: | + + pushd ${INSTALL_DIR} + tar -czvf ${BUILD_DIR}/openvino_package.tar.gz * + popd + + pushd ${INSTALL_TEST_DIR} + tar -czvf ${BUILD_DIR}/openvino_tests.tar.gz * + popd + + # # + # # Upload build artifacts + # # + + - name: Upload openvino package + if: ${{ always() }} + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 + with: + name: openvino_package + path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz + if-no-files-found: 'error' + + - name: Upload openvino tests package + if: ${{ always() }} + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 + with: + name: openvino_tests + path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz + if-no-files-found: 'error' + + - name: Clean ccache + run: ccache --cleanup + + CPU_Functional_Tests: + name: CPU functional tests + needs: [ Docker, Build, Smart_CI ] + timeout-minutes: 30 + runs-on: aks-linux-4-cores-16gb + container: + image: ${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_22_04_riscv }} + volumes: + - /mount:/mount + defaults: + run: + shell: bash + env: + INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + GTEST_FILTER: ${{ github.event_name == 'workflow_dispatch' && inputs.testFilter || '*smoke_AdaPoolAvg4DLayoutTest*' }} + + steps: + - name: Init XUANTIE + run: | + mkdir ${XUANTIE_DIR} + tar -xvf ${XUANTIE_BIN_PATH}/${XUANTIE_TAR_NAME}.tar -C ${XUANTIE_DIR} + chmod -R +x ${XUANTIE_DIR}/${XUANTIE_TAR_NAME} + + - name: Download OpenVINO package + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: openvino_package + path: ${{ env.INSTALL_DIR }} + + - name: Download OpenVINO tests package + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: openvino_tests + path: ${{ env.INSTALL_TEST_DIR }} + + # Needed as ${{ github.workspace }} is not working correctly when using Docker + - name: Setup Variables + run: | + echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" + echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" + + - name: Extract OpenVINO packages + run: | + pushd $INSTALL_DIR + tar -xzf openvino_package.tar.gz -C $INSTALL_DIR + popd + + pushd $INSTALL_TEST_DIR + tar -xzf openvino_tests.tar.gz -C $INSTALL_TEST_DIR + popd + + - name: Intel CPU plugin func tests + run: | + # Needed as the Linux CC does not require setupvars to work + if [[ -f "${INSTALL_DIR}/setupvars.sh" ]]; then + source ${INSTALL_DIR}/setupvars.sh + fi + # Needed as ze_loader.so is under INSTALL_TEST_DIR + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${INSTALL_TEST_DIR}/tests + ${XUANTIE_DIR}/${XUANTIE_TAR_NAME}/bin/qemu-riscv64 -cpu c910v ${INSTALL_TEST_DIR}/tests/ov_cpu_func_tests --gtest_filter=${{ env.GTEST_FILTER }} --gtest_print_time=1 + timeout-minutes: 25 + diff --git a/.github/workflows/export_workflow_metrics.yml b/.github/workflows/export_workflow_metrics.yml index 39bb699b8caa91..aef00244f8175b 100644 --- a/.github/workflows/export_workflow_metrics.yml +++ b/.github/workflows/export_workflow_metrics.yml @@ -34,7 +34,7 @@ permissions: read-all jobs: export-workflow-metrics: name: Export finished workflow metrics - runs-on: aks-linux-2-cores-8gb + runs-on: aks-linux-2-cores-8gb-stats if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: diff --git a/.github/workflows/job_jax_layer_tests.yml b/.github/workflows/job_jax_layer_tests.yml new file mode 100644 index 00000000000000..25f171060f43be --- /dev/null +++ b/.github/workflows/job_jax_layer_tests.yml @@ -0,0 +1,133 @@ +name: JAX Layer Tests + +on: + workflow_call: + inputs: + runner: + description: 'Machine on which the tests would run' + type: string + required: true + container: + description: 'JSON to be converted to the value of the "container" configuration for the job' + type: string + required: false + default: '{"image": null}' + affected-components: + description: 'Components that are affected by changes in the commit defined by the Smart CI Action' + type: string + required: true + python-version: + description: 'Python version to setup. E.g., "3.11"' + type: string + required: true + +permissions: read-all + +env: + PIP_CACHE_PATH_LINUX: /mount/caches/pip/linux + PIP_CACHE_PATH_WIN: "C:\\mount\\caches\\pip\\win" + +jobs: + JAX_Layer_Tests: + name: JAX Layer Tests + timeout-minutes: 40 + runs-on: ${{ inputs.runner }} + container: ${{ fromJSON(inputs.container) }} + defaults: + run: + shell: ${{ contains(inputs.runner, 'win') && 'pwsh' || 'bash' }} + env: + DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input + OPENVINO_REPO: ${{ github.workspace }}/openvino + INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels + LAYER_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/layer_tests + steps: + - name: Download OpenVINO artifacts (tarballs) + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + pattern: openvino_[tests]* + path: ${{ env.INSTALL_DIR }} + merge-multiple: true + + - name: Download OpenVINO artifacts (wheels) + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + pattern: openvino_[wheels]* + path: ${{ env.INSTALL_WHEELS_DIR }} + merge-multiple: true + + # Needed as ${{ github.workspace }} is not working correctly when using Docker + - name: Setup Variables + if: runner.os != 'Windows' + run: | + echo "OPENVINO_REPO=$GITHUB_WORKSPACE/openvino" >> "$GITHUB_ENV" + echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" + echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" + echo "INSTALL_WHEELS_DIR=$GITHUB_WORKSPACE/install/wheels" >> "$GITHUB_ENV" + echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV" + + - name: Install OpenVINO dependencies (mac) + if: runner.os == 'macOS' + run: brew install pigz + + - name: Extract OpenVINO packages (Linux, macOS) + if: runner.os != 'Windows' + run: | + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} + working-directory: ${{ env.INSTALL_DIR }} + + - name: Extract OpenVINO artifacts (Windows) + if: runner.os == 'Windows' + run: | + Expand-Archive openvino_tests.zip -DestinationPath ${{ env.INSTALL_DIR }} + working-directory: ${{ env.INSTALL_DIR }} + + - name: Fetch setup_python and install wheels actions + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + timeout-minutes: 15 + with: + sparse-checkout: | + .github/actions/setup_python/action.yml + .github/actions/install_ov_wheels/action.yml + sparse-checkout-cone-mode: false + path: 'openvino' + + - name: Setup Python ${{ inputs.python-version }} + uses: ./openvino/.github/actions/setup_python + with: + version: ${{ inputs.python-version }} + pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH_LINUX || env.PIP_CACHE_PATH_WIN }} + should-setup-pip-paths: ${{ runner.os != 'macOS' }} + self-hosted-runner: ${{ runner.os != 'macOS' }} + + - name: Install OpenVINO Python wheels + uses: ./openvino/.github/actions/install_ov_wheels + with: + wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} + wheels-to-install: 'openvino' + + - name: Install JAX Layer tests dependencies + run: | + # jax test requirements + python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/requirements_jax + + - name: JAX Layer Tests + if: ${{ fromJSON(inputs.affected-components).JAX_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196 + run: python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/jax_tests ${PARALLEL} -m precommit_jax_fe --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-jax.xml + env: + TEST_DEVICE: CPU + TEST_PRECISION: FP16 + JAX_TRACE_MODE: JAXPR + PARALLEL: ${{ runner.os == 'Windows' && ' ' || '-n logical'}} + + - name: Upload Test Results + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + if: ${{ !cancelled() }} + with: + name: test-results-python-jax-layers + path: | + ${{ env.INSTALL_TEST_DIR }}/TEST*.html + ${{ env.INSTALL_TEST_DIR }}/TEST*.xml + if-no-files-found: 'warn' diff --git a/.github/workflows/job_jax_models_tests.yml b/.github/workflows/job_jax_models_tests.yml index 07155db1016057..57eb07a83aa423 100644 --- a/.github/workflows/job_jax_models_tests.yml +++ b/.github/workflows/job_jax_models_tests.yml @@ -89,7 +89,7 @@ jobs: - name: Install JAX tests requirements for precommit run: | - python3 -m pip install -r ${MODEL_HUB_TESTS_INSTALL_DIR}/jax/requirements.txt + python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/requirements_jax - name: JAX/Flax Models Tests from Hugging Face if: ${{ inputs.model_scope == 'precommit' || inputs.model_scope == 'nightly' }} diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index b04f719c8e296f..e1532d530ff2db 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -162,14 +162,6 @@ jobs: export LD_LIBRARY_PATH=${PIP_INSTALL_PATH}/openvino/libs:$LD_LIBRARY_PATH python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/py_frontend_tests --junitxml=${INSTALL_TEST_DIR}/TEST-test_py_fontend.xml - - name: JAX Layer Tests - JAX FE - if: ${{ fromJSON(inputs.affected-components).JAX_FE.test && runner.arch != 'ARM64' && runner.os != 'macOS' }} - run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/jax_tests/ -m precommit_jax_fe --junitxml=${INSTALL_TEST_DIR}/TEST-jax_fe.xml - env: - TEST_DEVICE: CPU - TEST_PRECISION: FP16 - JAX_TRACE_MODE: JAXPR - - name: TensorFlow Lite Layer Tests - TFL FE if: fromJSON(inputs.affected-components).TFL_FE.test run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/tensorflow_lite_tests/ -n logical --junitxml=${INSTALL_TEST_DIR}/TEST-tfl_fe.xml diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index 66e825e5d5e126..ca1ca6e056e23d 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -202,6 +202,16 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' + JAX_Layer_Tests: + name: JAX Layer Tests + needs: [ Build, Docker, Smart_CI ] + uses: ./.github/workflows/job_jax_layer_tests.yml + with: + runner: 'aks-linux-16-cores-32gb-arm' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' + CPU_Functional_Tests: name: CPU functional tests if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 5e4335b8151c02..0fbc20cf19594b 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -356,6 +356,15 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' + JAX_Layer_Tests: + name: JAX Layer Tests + needs: [ Build, Smart_CI ] + uses: ./.github/workflows/job_jax_layer_tests.yml + with: + runner: 'macos-13' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' + CPU_Functional_Tests: name: CPU functional tests # if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml index 855d76973cc2e4..b60daefa442c83 100644 --- a/.github/workflows/mac_arm64.yml +++ b/.github/workflows/mac_arm64.yml @@ -355,6 +355,15 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' + JAX_Layer_Tests: + name: JAX Layer Tests + needs: [ Build, Smart_CI ] + uses: ./.github/workflows/job_jax_layer_tests.yml + with: + runner: 'macos-13-xlarge' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' + CPU_Functional_Tests: name: CPU functional tests if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test diff --git a/.github/workflows/ubuntu_22.yml b/.github/workflows/ubuntu_22.yml index 5aed74bbb242b8..e5c7d25003de1e 100644 --- a/.github/workflows/ubuntu_22.yml +++ b/.github/workflows/ubuntu_22.yml @@ -334,6 +334,16 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' + JAX_Layer_Tests: + name: JAX Layer Tests + needs: [ Docker, Build, Smart_CI ] + uses: ./.github/workflows/job_jax_layer_tests.yml + with: + runner: 'aks-linux-4-cores-16gb' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_22_04_x64 }}", "volumes": ["/mount:/mount"]}' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' + CPU_Functional_Tests: name: CPU functional tests if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test diff --git a/.github/workflows/ubuntu_24.yml b/.github/workflows/ubuntu_24.yml index 25be095e692d35..beac15bfbda97d 100644 --- a/.github/workflows/ubuntu_24.yml +++ b/.github/workflows/ubuntu_24.yml @@ -156,6 +156,16 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.12' + JAX_Layer_Tests: + name: JAX Layer Tests + needs: [ Docker, Build, Smart_CI ] + uses: ./.github/workflows/job_jax_layer_tests.yml + with: + runner: 'aks-linux-4-cores-16gb' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_24_04_x64 }}", "volumes": ["/mount:/mount"]}' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.12' + TensorFlow_Layer_Tests: name: TensorFlow Layer Tests needs: [ Docker, Build, Smart_CI, Openvino_tokenizers ] diff --git a/.github/workflows/windows_vs2019_release.yml b/.github/workflows/windows_vs2019_release.yml index f1fd0be596baa2..de33f2603d7430 100644 --- a/.github/workflows/windows_vs2019_release.yml +++ b/.github/workflows/windows_vs2019_release.yml @@ -499,6 +499,15 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' + JAX_Layer_Tests: + name: JAX Layer Tests + needs: [ Build, Smart_CI ] + uses: ./.github/workflows/job_jax_layer_tests.yml + with: + runner: 'aks-win-8-cores-16gb' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' + CXX_Unit_Tests: name: C++ unit tests needs: [ Build, Smart_CI ] diff --git a/.github/workflows/workflow_rerunner.yml b/.github/workflows/workflow_rerunner.yml index 0d8d6610bea588..535101ec943264 100644 --- a/.github/workflows/workflow_rerunner.yml +++ b/.github/workflows/workflow_rerunner.yml @@ -29,7 +29,7 @@ jobs: name: Rerun Workflow # Run only for the failed workflows in openvinotoolkit org if: ${{ github.event.workflow_run.conclusion == 'failure' && github.repository_owner == 'openvinotoolkit' }} - runs-on: aks-linux-2-cores-8gb + runs-on: aks-linux-2-cores-8gb-stats permissions: actions: write contents: read @@ -70,7 +70,7 @@ jobs: rerunner_tests: name: Rerunner Tests if: ${{ github.event_name == 'pull_request' && github.repository_owner == 'openvinotoolkit' }} - runs-on: aks-linux-2-cores-8gb + runs-on: aks-linux-2-cores-8gb-stats steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 @@ -96,9 +96,9 @@ jobs: working-directory: ${{ github.workspace }}/.github/scripts/workflow_rerun run: | export PYTHONPATH=${{ github.workspace }}/.github/scripts/workflow_rerun:${{ github.workspace }}/.github/scripts:$PYTHONPATH - + # Need to get a run id with successful status for log analyzing # cannot lock a run id as logs get deleted after some time run_id=$(python3 -c "from github import Github, Auth; import os; github=Github(auth=Auth.Token(token=os.environ.get('GITHUB_TOKEN'))); repo = github.get_repo('${GITHUB_REPOSITORY}'); run_id = repo.get_workflow_runs(status='success')[0].id; print(run_id)") - + python3 rerunner.py --repository-name ${GITHUB_REPOSITORY} --run-id $run_id --dry-run diff --git a/cmake/developer_package/compile_flags/os_flags.cmake b/cmake/developer_package/compile_flags/os_flags.cmake index 660fd6160893ae..e75c6851ad0f7b 100644 --- a/cmake/developer_package/compile_flags/os_flags.cmake +++ b/cmake/developer_package/compile_flags/os_flags.cmake @@ -455,6 +455,12 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") # Build with multiple processes ov_add_compiler_flags(/MP) + # Workaround for an MSVC compiler issue in some versions of Visual Studio 2022. + # The issue involves a null dereference to a mutex. For details, refer to link https://github.com/microsoft/STL/wiki/Changelog#vs-2022-1710 + if(MSVC AND MSVC_VERSION GREATER_EQUAL 1930 AND MSVC_VERSION LESS 1941) + ov_add_compiler_flags(/D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR) + endif() + if(AARCH64 AND NOT MSVC_VERSION LESS 1930) # otherwise, _ARM64_EXTENDED_INTRINSICS is defined, which defines 'mvn' macro ov_add_compiler_flags(/D_ARM64_DISTINCT_NEON_TYPES) diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst index d877cb1768d44d..f4ec275491fa32 100644 --- a/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst +++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst @@ -6,16 +6,14 @@ models from OpenVINO-supported frameworks may also work properly but have not be **AI Models that run on Intel® Core Ultra™ Processors with OpenVINO™ toolkit:** -.. raw:: html - - - - -.. csv-table:: +.. data-table:: :class: modeldata stripe :name: supportedModelsTable :header-rows: 1 :file: ../../_static/download/supported_models.csv + :data-column-hidden: [] + :data-order: [[ 0, "asc" ]] + :data-page-length: 10 | Marked cells indicate models that passed inference with no errors. Empty cells indicate diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst index d27f7626391f46..1bd8f5dae7c634 100644 --- a/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst +++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst @@ -41,27 +41,36 @@ Data as of OpenVINO 2024.4, 18 Oct. 2024. .. tab-item:: PyTorch - .. csv-table:: + .. data-table:: :class: modeldata stripe - :name: TensorFlow ops + :name: TensorFlow_ops_v1 :header-rows: 1 :file: ../../_static/conformance_files/pytorch_ops.csv + :data-column-hidden: [] + :data-order: [[ 0, "asc" ]] + :data-page-length: 10 .. tab-item:: TensorFlow - .. csv-table:: + .. data-table:: :class: modeldata stripe - :name: TensorFlow ops + :name: TensorFlow_ops_v2 :header-rows: 1 :file: ../../_static/conformance_files/tensorflow_ops.csv + :data-column-hidden: [] + :data-order: [[ 0, "asc" ]] + :data-page-length: 10 .. tab-item:: PaddlePaddle - .. csv-table:: + .. data-table:: :class: modeldata stripe - :name: Paddle ops + :name: Paddle_ops :header-rows: 1 :file: ../../_static/conformance_files/paddlepaddle_ops.csv + :data-column-hidden: [] + :data-order: [[ 0, "asc" ]] + :data-page-length: 10 .. tab-item:: ONNX diff --git a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst index 085a1ff8449151..83581d465df92e 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst @@ -8,10 +8,6 @@ The current data is as of OpenVINO 2024.4, 20 Nov. 2024. The tables below list the key performance indicators for inference on built-in GPUs. -.. raw:: html - - - .. tab-set:: @@ -22,7 +18,9 @@ The tables below list the key performance indicators for inference on built-in G :name: supportedModelsTable_V1 :header-rows: 1 :file: ../../_static/benchmarks_files/llm_models_9-288V.csv - :hidden: [3,4,6] + :data-column-hidden: [3,4,6] + :data-order: [[ 0, "asc" ]] + :data-page-length: 10 .. tab-item:: 7-268V @@ -31,7 +29,8 @@ The tables below list the key performance indicators for inference on built-in G :name: supportedModelsTable_V2 :header-rows: 1 :file: ../../_static/benchmarks_files/llm_models_7-258V.csv - :hidden: [3,4,6] + :data-column-hidden: [3,4,6] + :data-order: [[ 0, "asc" ]] .. tab-item:: 7-155H @@ -40,7 +39,8 @@ The tables below list the key performance indicators for inference on built-in G :name: supportedModelsTable_V3 :header-rows: 1 :file: ../../_static/benchmarks_files/llm_models_7-155H.csv - :hidden: [3,4,6] + :data-column-hidden: [3,4,6] + :data-order: [[ 0, "asc" ]] .. grid:: 1 1 2 2 diff --git a/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst b/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst index 6ac806daf0cda0..62cfdf05f2b11f 100644 --- a/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst +++ b/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst @@ -203,6 +203,52 @@ Here is an example of how to convert a model obtained with ``torch.export``: This is an experimental feature. Use it only if you know that you need to. PyTorch version 2.2 is recommended. Dynamic shapes are not supported yet. +Converting a PyTorch Model from Disk +#################################### + +PyTorch provides the capability to save models in two distinct formats: ``torch.jit.ScriptModule`` and ``torch.export.ExportedProgram``. +Both formats can be saved to disk as standalone files, enabling them to be reloaded independently of the original Python code. + +ExportedProgram Format +++++++++++++++++++++++ + +The ``ExportedProgram`` format is saved on disk using `torch.export.save() `__. +Below is an example of how to convert an ``ExportedProgram`` from disk: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + import openvino as ov + ov_model = ov.convert_model('exported_program.pt2') + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc exported_program.pt2 + +ScriptModule Format ++++++++++++++++++++ + +`torch.jit.save() `__ serializes ``ScriptModule`` object on disk. +To convert the serialized ``ScriptModule`` format, run ``convert_model`` function with ``example_input`` parameter as follows: + +.. code-block:: py + :force: + + from openvino import convert_model + import torch + + convert_model(input_model='script_module.pt', example_input=torch.rand(1, 10)) + +``example_input`` is the required parameter for the conversion because ``torch.jit.ScriptModule`` object is always saved in an untraced state on disk. + Exporting a PyTorch Model to ONNX Format ######################################## diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst index 46b541d84d4035..6cc211116d1199 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst @@ -63,19 +63,19 @@ the model precision and the ratio of P-cores and E-cores. Then the default settings for low-level performance properties on Windows and Linux are as follows: -+--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ -| Property | Windows | Linux | -+======================================+=======================================================================+=======================================================================+ -| ``ov::num_streams`` | 1 | 1 | -+--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ -| ``ov::inference_num_threads`` | is equal to the number of P-cores or P-cores+E-cores on one numa node | is equal to the number of P-cores or P-cores+E-cores on one numa node | -+--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ -| ``ov::hint::scheduling_core_type`` | :ref:`Core Type Table of Latency Hint ` | :ref:`Core Type Table of Latency Hint ` | -+--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ -| ``ov::hint::enable_hyper_threading`` | No | No | -+--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ -| ``ov::hint::enable_cpu_pinning`` | No / Not Supported | Yes except using P-cores and E-cores together | -+--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ ++--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+ +| Property | Windows | Linux | ++======================================+====================================================================+====================================================================+ +| ``ov::num_streams`` | 1 | 1 | ++--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+ +| ``ov::inference_num_threads`` | is equal to the number of P-cores or P-cores+E-cores on one socket | is equal to the number of P-cores or P-cores+E-cores on one socket | ++--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+ +| ``ov::hint::scheduling_core_type`` | :ref:`Core Type Table of Latency Hint ` | :ref:`Core Type Table of Latency Hint ` | ++--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+ +| ``ov::hint::enable_hyper_threading`` | No | No | ++--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+ +| ``ov::hint::enable_cpu_pinning`` | No / Not Supported | Yes except using P-cores and E-cores together | ++--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+ .. note:: @@ -96,7 +96,7 @@ Then the default settings for low-level performance properties on Windows and Li Starting from 5th Gen Intel Xeon Processors, new microarchitecture enabled new sub-NUMA clusters feature. A sub-NUMA cluster (SNC) can create two or more localization domains (numa nodes) within a socket by BIOS configuration. - By default OpenVINO with latency hint uses single NUMA node for inference. Although such + By default OpenVINO with latency hint uses single socket for inference. Although such behavior allows to achive best performance for most of the models, there might be corner cases which require manual tuning of ``ov::num_streams`` and ``ov::hint::enable_hyper_threading parameters``. Please find more detail about `Sub-NUMA Clustering `__ diff --git a/docs/openvino_sphinx_theme/openvino_sphinx_theme/directives/code.py b/docs/openvino_sphinx_theme/openvino_sphinx_theme/directives/code.py index c3e0e81eec3b3a..814517289ce114 100644 --- a/docs/openvino_sphinx_theme/openvino_sphinx_theme/directives/code.py +++ b/docs/openvino_sphinx_theme/openvino_sphinx_theme/directives/code.py @@ -11,7 +11,7 @@ import requests import re import json - +import html import csv logger = logging.getLogger(__name__) @@ -147,7 +147,9 @@ class DataTable(Directive): 'file': directives.path, 'class': directives.unchanged, 'name': directives.unchanged, - 'hidden': directives.unchanged + 'data-column-hidden': directives.unchanged, + 'data-page-length': directives.unchanged, + 'data-order': directives.unchanged } def run(self) -> List[Node]: @@ -159,10 +161,12 @@ def run(self) -> List[Node]: csv_node = [] with open(csv_file, 'r') as j: csv_data = list(csv.reader(j)) - class_table_tag = ' class="' + "".join(c for c in str(self.options['class']) + '"') if 'class' in self.options is not None else "" - id_table_tag = ' id="' + "".join(c for c in str(self.options['name']) + '"') if 'name' in self.options is not None else "" - hidden_table_tag = ' data-columns-hidden="' + "".join(c for c in str(self.options['hidden']) + '"') if 'hidden' in self.options is not None else "" - csv_table_html = '' + class_table_tag = f' class="{html.escape(self.options["class"])}"' if "class" in self.options else "" + id_table_tag = f' id="{html.escape(self.options["name"])}"' if "name" in self.options else "" + data_column_hidden_tag = f' data-column-hidden="{html.escape(self.options["data-column-hidden"])}"' if "data-column-hidden" in self.options else "" + data_order_tag = f' data-order="{html.escape(self.options["data-order"])}"' if "data-order" in self.options else "" + data_page_length_tag = f' data-page-length="{html.escape(self.options["data-page-length"])}"' if "data-page-length" in self.options else "" + csv_table_html = f'' head_rows = 0 head_rows += self.options.get('header-rows', 0) row_count = 0 diff --git a/docs/snippets/CMakeLists.txt b/docs/snippets/CMakeLists.txt index ec1cf14bd0e60b..18d176212a0212 100644 --- a/docs/snippets/CMakeLists.txt +++ b/docs/snippets/CMakeLists.txt @@ -122,12 +122,15 @@ set(TARGET_NAME_PY "ov_integration_snippet_py") cmake_minimum_required(VERSION 3.10) set(CMAKE_CXX_STANDARD 11) -find_package(Python3 REQUIRED) - execute_process( - COMMAND ${Python3_EXECUTABLE} -c "from openvino.utils import get_cmake_path; print(get_cmake_path(), end='')" - OUTPUT_VARIABLE OpenVINO_DIR_PY - ERROR_QUIET - ) +if(NOT CMAKE_CROSSCOMPILING) + find_package(Python3 QUIET COMPONENTS Interpreter) + if(Python3_Interpreter_FOUND) + execute_process( + COMMAND ${Python3_EXECUTABLE} -c "from openvino.utils import get_cmake_path; print(get_cmake_path(), end='')" + OUTPUT_VARIABLE OpenVINO_DIR_PY + ERROR_QUIET) + endif() +endif() find_package(OpenVINO REQUIRED PATHS "${OpenVINO_DIR_PY}") diff --git a/docs/sphinx_setup/_static/css/custom.css b/docs/sphinx_setup/_static/css/custom.css index de8a05732a4d06..1679f7309da044 100644 --- a/docs/sphinx_setup/_static/css/custom.css +++ b/docs/sphinx_setup/_static/css/custom.css @@ -69,7 +69,7 @@ a#wap_dns { /* Sphinx-design tabs override */ .sd-tab-set>input:checked+label { color: var(--sd-color-black) !important; - background-color: #f8f8f8 !important; + background-color: white !important; border: solid 1px #bdbdbd; border-bottom: solid 0px; margin-bottom: -1px; @@ -96,7 +96,7 @@ a#wap_dns { cursor: pointer; font-size: var(--sd-fontsize-tabs-label); font-weight: 400 !important; - padding: 5px 16px 2px !important; + padding: 5px 16px 0px !important; transition: color 250ms; width: auto; z-index: 1; @@ -110,7 +110,6 @@ a#wap_dns { box-shadow: 0 0 0 0; border: solid 1px var(--sd-color-tabs-overline); border-color: #bdbdbd; - background-color: #f8f8f8; padding-right: 4px; padding-left: 4px; padding-bottom: 6px; diff --git a/docs/sphinx_setup/_static/css/openVinoDataTables.css b/docs/sphinx_setup/_static/css/openVinoDataTables.css index 526aabb6abe15d..bedc0f5206e260 100644 --- a/docs/sphinx_setup/_static/css/openVinoDataTables.css +++ b/docs/sphinx_setup/_static/css/openVinoDataTables.css @@ -6,8 +6,7 @@ div.dt-buttons>.dt-button, div.dt-buttons>div.dt-button-split .dt-button { } div.dt-container .dt-paging .dt-paging-button:hover { - color: white !important; - border: 1px solid #aaa; + border: 1px solid #aaa !important; background:none !important; background-color: var(--bttn-act-bg-hover) !important } @@ -190,10 +189,9 @@ div.dt-container .dt-paging .dt-paging-button { div.dt-container .dt-paging .dt-paging-button.current, div.dt-container .dt-paging .dt-paging-button.current:hover { background: none !important; - background-color: var(--bttn-act-bg-active) !important; + background-color: var(--bttn-sec-border-color) !important; border-color: var(--bttn-act-bg-active) !important; border-radius: 0px !important; - color: white !important; border: 1px !important } table.dataTable thead>tr>th.dt-orderable-asc span.dt-column-order:before, table.dataTable thead>tr>th.dt-orderable-asc span.dt-column-order:after, table.dataTable thead>tr>th.dt-orderable-desc span.dt-column-order:before, table.dataTable thead>tr>th.dt-orderable-desc span.dt-column-order:after, table.dataTable thead>tr>th.dt-ordering-asc span.dt-column-order:before, table.dataTable thead>tr>th.dt-ordering-asc span.dt-column-order:after, table.dataTable thead>tr>th.dt-ordering-desc span.dt-column-order:before, table.dataTable thead>tr>th.dt-ordering-desc span.dt-column-order:after, table.dataTable thead>tr>td.dt-orderable-asc span.dt-column-order:before, table.dataTable thead>tr>td.dt-orderable-asc span.dt-column-order:after, table.dataTable thead>tr>td.dt-orderable-desc span.dt-column-order:before, table.dataTable thead>tr>td.dt-orderable-desc span.dt-column-order:after, table.dataTable thead>tr>td.dt-ordering-asc span.dt-column-order:before, table.dataTable thead>tr>td.dt-ordering-asc span.dt-column-order:after, table.dataTable thead>tr>td.dt-ordering-desc span.dt-column-order:before, table.dataTable thead>tr>td.dt-ordering-desc span.dt-column-order:after { diff --git a/docs/sphinx_setup/_static/js/openVinoDataTables.js b/docs/sphinx_setup/_static/js/openVinoDataTables.js index bd56a71533786c..fb3a57d959020c 100644 --- a/docs/sphinx_setup/_static/js/openVinoDataTables.js +++ b/docs/sphinx_setup/_static/js/openVinoDataTables.js @@ -1,16 +1,15 @@ $(document).ready(function () { var columnDefs = []; - var tables = $('table.modeldata'); for (let table of tables) { - var hidden = table.getAttribute('data-columns-hidden'); + var hidden = table.getAttribute('data-column-hidden'); columnDefs = [{ "visible": false, "targets": JSON.parse(hidden) }] $(table).DataTable({ responsive: true, "autoWidth": false, language: { buttons: { - colvisRestore: "Restore default" + colvisRestore: "Restore default selection" } }, lengthMenu: [ diff --git a/docs/sphinx_setup/_templates/layout.html b/docs/sphinx_setup/_templates/layout.html index 0d2331b2c83fe3..a791091e1f13a4 100644 --- a/docs/sphinx_setup/_templates/layout.html +++ b/docs/sphinx_setup/_templates/layout.html @@ -9,6 +9,7 @@ + diff --git a/src/bindings/python/src/openvino/__init__.py b/src/bindings/python/src/openvino/__init__.py index 7643f742e0067d..69c678909b1c9e 100644 --- a/src/bindings/python/src/openvino/__init__.py +++ b/src/bindings/python/src/openvino/__init__.py @@ -7,7 +7,7 @@ # Required for Windows OS platforms # Note: always top-level try: - from openvino.package_utils import _add_openvino_libs_to_search_path + from openvino.utils import _add_openvino_libs_to_search_path _add_openvino_libs_to_search_path() except ImportError: pass @@ -17,47 +17,6 @@ # # This __init__.py forces checking of runtime modules to propagate errors. # # It is not compared with init files from openvino-dev package. # # - -# Openvino pybind bindings -from openvino._pyopenvino import AxisSet -from openvino._pyopenvino import AxisVector -from openvino._pyopenvino import ConstOutput -from openvino._pyopenvino import Coordinate -from openvino._pyopenvino import CoordinateDiff -from openvino._pyopenvino import DiscreteTypeInfo -from openvino._pyopenvino import Extension -from openvino._pyopenvino import ProfilingInfo -from openvino._pyopenvino import RTMap -from openvino._pyopenvino import Version -from openvino._pyopenvino import Symbol -from openvino._pyopenvino import Dimension -from openvino._pyopenvino import Input -from openvino._pyopenvino import Output -from openvino._pyopenvino import Node -from openvino._pyopenvino import Strides -from openvino._pyopenvino import PartialShape -from openvino._pyopenvino import Shape -from openvino._pyopenvino import Layout -from openvino._pyopenvino import Type -from openvino._pyopenvino import Tensor -from openvino._pyopenvino import OVAny -from openvino._pyopenvino import get_batch -from openvino._pyopenvino import set_batch -from openvino._pyopenvino import serialize -from openvino._pyopenvino import shutdown -from openvino._pyopenvino import save_model -from openvino._pyopenvino import layout_helpers -from openvino._pyopenvino import RemoteContext -from openvino._pyopenvino import RemoteTensor -from openvino._pyopenvino import Op - -# Import public classes from _ov_api -from openvino._ov_api import Model -from openvino._ov_api import Core -from openvino._ov_api import CompiledModel -from openvino._ov_api import InferRequest -from openvino._ov_api import AsyncInferQueue - # Import all public modules from openvino import runtime as runtime from openvino import frontend as frontend @@ -67,10 +26,36 @@ from openvino import utils as utils from openvino import properties as properties +# Import most important classes and functions from openvino.runtime +from openvino._ov_api import Model +from openvino._ov_api import Core +from openvino._ov_api import CompiledModel +from openvino._ov_api import InferRequest +from openvino._ov_api import AsyncInferQueue + +from openvino.runtime import Symbol +from openvino.runtime import Dimension +from openvino.runtime import Strides +from openvino.runtime import PartialShape +from openvino.runtime import Shape +from openvino.runtime import Layout +from openvino.runtime import Type +from openvino.runtime import Tensor +from openvino.runtime import OVAny + # Helper functions for openvino module -from openvino.utils.data_helpers import tensor_from_file +from openvino.runtime.utils.data_helpers import tensor_from_file from openvino._ov_api import compile_model +from openvino.runtime import get_batch +from openvino.runtime import set_batch +from openvino.runtime import serialize +from openvino.runtime import shutdown +from openvino.runtime import save_model +from openvino.runtime import layout_helpers +from openvino._pyopenvino import RemoteContext +from openvino._pyopenvino import RemoteTensor +from openvino._pyopenvino import Op # Import opsets from openvino import opset1 @@ -95,7 +80,7 @@ from openvino._pyopenvino import VASurfaceTensor # Set version for openvino package -from openvino._pyopenvino import get_version +from openvino.runtime import get_version __version__ = get_version() # Tools diff --git a/src/bindings/python/src/openvino/_ov_api.py b/src/bindings/python/src/openvino/_ov_api.py index da31fab4c95d8e..53d0fa5316498b 100644 --- a/src/bindings/python/src/openvino/_ov_api.py +++ b/src/bindings/python/src/openvino/_ov_api.py @@ -5,7 +5,9 @@ from types import TracebackType from typing import Any, Iterable, Union, Optional, Dict, Type from pathlib import Path +import warnings +import numpy as np from openvino._pyopenvino import Model as ModelBase from openvino._pyopenvino import Core as CoreBase @@ -14,7 +16,7 @@ from openvino._pyopenvino import Tensor from openvino._pyopenvino import Node -from openvino.utils.data_helpers import ( +from openvino.runtime.utils.data_helpers import ( OVDict, _InferRequestWrapper, _data_dispatch, diff --git a/src/bindings/python/src/openvino/frontend/frontend.py b/src/bindings/python/src/openvino/frontend/frontend.py index 6a16d5a573b7d7..4d549d24b4ef7c 100644 --- a/src/bindings/python/src/openvino/frontend/frontend.py +++ b/src/bindings/python/src/openvino/frontend/frontend.py @@ -7,7 +7,7 @@ from openvino._pyopenvino import FrontEnd as FrontEndBase from openvino._pyopenvino import FrontEndManager as FrontEndManagerBase from openvino._pyopenvino import InputModel -from openvino import Model +from openvino.runtime import Model class FrontEnd(FrontEndBase): diff --git a/src/bindings/python/src/openvino/frontend/jax/jaxpr_decoder.py b/src/bindings/python/src/openvino/frontend/jax/jaxpr_decoder.py index 9072598f824939..914f6b2e2ee548 100644 --- a/src/bindings/python/src/openvino/frontend/jax/jaxpr_decoder.py +++ b/src/bindings/python/src/openvino/frontend/jax/jaxpr_decoder.py @@ -6,7 +6,7 @@ import jax.core from openvino.frontend.jax.py_jax_frontend import _FrontEndJaxDecoder as Decoder -from openvino import PartialShape, Type as OVType, OVAny +from openvino.runtime import PartialShape, Type as OVType, OVAny from openvino.frontend.jax.utils import jax_array_to_ov_const, get_ov_type_for_value, \ ivalue_to_constant, param_to_constants diff --git a/src/bindings/python/src/openvino/frontend/jax/utils.py b/src/bindings/python/src/openvino/frontend/jax/utils.py index 659677b11d5af8..4535265d6de082 100644 --- a/src/bindings/python/src/openvino/frontend/jax/utils.py +++ b/src/bindings/python/src/openvino/frontend/jax/utils.py @@ -8,7 +8,7 @@ import jax.numpy as jnp import numpy as np from openvino.frontend.jax.passes import filter_element, filter_ivalue, filter_param -from openvino import op, Type as OVType, Shape, OVAny +from openvino.runtime import op, Type as OVType, Shape, OVAny numpy_to_ov_type_map = { np.float32: OVType.f32, diff --git a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py index 81a2764ee1188d..c448571f1ac17a 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py @@ -10,7 +10,7 @@ from openvino.frontend.pytorch.py_pytorch_frontend import _FrontEndPytorchDecoder as Decoder from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType -from openvino import PartialShape, Type as OVType, OVAny, Shape +from openvino.runtime import PartialShape, Type as OVType, OVAny, Shape from openvino.frontend.pytorch.utils import make_constant, fetch_attr, pt_to_ov_type_map, torch_tensor_to_ov_const logger = logging.getLogger(__name__) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py index a9a65781dcb254..9f2ef019769875 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py @@ -18,7 +18,7 @@ from torch._decomp import decomposition_table, get_decompositions from openvino.frontend import FrontEndManager -from openvino import Core, Type, PartialShape +from openvino.runtime import Core, Type, PartialShape from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder from openvino.frontend.pytorch.torchdynamo import decompositions from openvino.frontend.pytorch.torchdynamo.decompositions import get_aot_decomposition_list, get_inf_decomposition_list @@ -27,7 +27,7 @@ from openvino.frontend.pytorch.torchdynamo.compile import cached_model_name, openvino_compile_cached_model from openvino.frontend.pytorch.torchdynamo.backend_utils import _get_cache_dir, _get_device, _get_model_caching, _get_decompositions, _get_aot_autograd -from openvino import Core, Type, PartialShape +from openvino.runtime import Core, Type, PartialShape logger = logging.getLogger(__name__) logger.setLevel(logging.WARNING) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend_utils.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend_utils.py index c9a772b3feac42..47b3b82806b18b 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend_utils.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend_utils.py @@ -5,7 +5,7 @@ # mypy: ignore-errors from typing import Optional, Any -from openvino import Core +from openvino.runtime import Core def _get_device(options) -> Optional[Any]: diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py index ca8d5478e76c15..fa446893a05d07 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py @@ -14,7 +14,7 @@ from openvino.frontend import FrontEndManager from openvino.frontend.pytorch.fx_decoder import TorchFXPythonDecoder -from openvino import Core, Type, PartialShape, serialize +from openvino.runtime import Core, Type, PartialShape, serialize from openvino.frontend.pytorch.torchdynamo.backend_utils import _get_cache_dir, _get_device, _get_config, _is_cache_dir_in_config from typing import Callable, Optional diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/execute.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/execute.py index 7527ad7acb37a4..4f41f7b5a6a9de 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/execute.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/execute.py @@ -20,7 +20,7 @@ from openvino.frontend.pytorch.fx_decoder import TorchFXPythonDecoder from openvino.frontend.pytorch.torchdynamo.partition import Partitioner from openvino.frontend.pytorch.torchdynamo.compile import openvino_compile -from openvino import Core, Type, PartialShape +from openvino.runtime import Core, Type, PartialShape from openvino.frontend.pytorch.torchdynamo.backend_utils import _get_cache_dir, _get_device, _get_aot_autograd from typing import Callable, Optional, Any diff --git a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py index 7bb8073167a654..6d8fdb1658793e 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py @@ -6,7 +6,7 @@ from openvino.frontend.pytorch.py_pytorch_frontend import _FrontEndPytorchDecoder as Decoder from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType -from openvino import op, PartialShape, Type as OVType, OVAny +from openvino.runtime import op, PartialShape, Type as OVType, OVAny from openvino.frontend.pytorch.utils import ( ivalue_to_constant, get_value_from_getattr, @@ -15,7 +15,7 @@ convert_quantized_tensor, graph_has_ops, ) -from openvino import opset11 as ops +from openvino.runtime import opset11 as ops from openvino.frontend.pytorch import quantized, patch_model from openvino.frontend.pytorch.module_extension import ModuleExtension diff --git a/src/bindings/python/src/openvino/frontend/pytorch/utils.py b/src/bindings/python/src/openvino/frontend/pytorch/utils.py index 9ba36707037c9e..826d766505fa79 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/utils.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/utils.py @@ -7,8 +7,8 @@ import torch import numpy as np -from openvino import op, Type as OVType, Shape, Tensor -from openvino import opset11 as ops +from openvino.runtime import op, Type as OVType, Shape, Tensor +from openvino.runtime import opset11 as ops def make_constant(*args, **kwargs): diff --git a/src/bindings/python/src/openvino/frontend/tensorflow/node_decoder.py b/src/bindings/python/src/openvino/frontend/tensorflow/node_decoder.py index d15262cbc30366..fcedd7a74c2b51 100644 --- a/src/bindings/python/src/openvino/frontend/tensorflow/node_decoder.py +++ b/src/bindings/python/src/openvino/frontend/tensorflow/node_decoder.py @@ -7,7 +7,7 @@ import numpy as np import tensorflow as tf from openvino.frontend.tensorflow.py_tensorflow_frontend import _FrontEndDecoderBase as DecoderBase -from openvino import PartialShape, Type, OVAny, Tensor +from openvino.runtime import PartialShape, Type, OVAny, Tensor def tf_type_to_ov_type(tf_type_int): diff --git a/src/bindings/python/src/openvino/frontend/tensorflow/utils.py b/src/bindings/python/src/openvino/frontend/tensorflow/utils.py index 7de5dc950be53e..74c0dfff92297e 100644 --- a/src/bindings/python/src/openvino/frontend/tensorflow/utils.py +++ b/src/bindings/python/src/openvino/frontend/tensorflow/utils.py @@ -8,7 +8,7 @@ import logging as log import numpy as np import sys -from openvino import PartialShape, Dimension, Type +from openvino.runtime import PartialShape, Dimension, Type from packaging.version import parse, Version from typing import List, Dict, Union diff --git a/src/bindings/python/src/openvino/helpers/packing.py b/src/bindings/python/src/openvino/helpers/packing.py index d0956e09fc6261..796af87402f3a6 100644 --- a/src/bindings/python/src/openvino/helpers/packing.py +++ b/src/bindings/python/src/openvino/helpers/packing.py @@ -5,7 +5,7 @@ import numpy as np from typing import Union -from openvino import Type, Shape +from openvino.runtime import Type, Shape def pack_data(array: np.ndarray, type: Type) -> np.ndarray: diff --git a/src/bindings/python/src/openvino/opset1/ops.py b/src/bindings/python/src/openvino/opset1/ops.py index e264aea304fb1f..edca6c62a0b246 100644 --- a/src/bindings/python/src/openvino/opset1/ops.py +++ b/src/bindings/python/src/openvino/opset1/ops.py @@ -8,17 +8,17 @@ import numpy as np from functools import partial -from openvino import Node, PartialShape, Type +from openvino.runtime import Node, PartialShape, Type from openvino.op import Constant, Parameter, tensor_iterator -from openvino.utils.node_factory import _get_node_factory -from openvino.utils.decorators import binary_op, nameable_op, unary_op -from openvino.utils.input_validation import ( +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op +from openvino.runtime.utils.input_validation import ( check_valid_attributes, is_non_negative_value, is_positive_value, ) -from openvino.utils.node_factory import NodeFactory -from openvino.utils.types import ( +from openvino.runtime.utils.node_factory import NodeFactory +from openvino.runtime.utils.types import ( NodeInput, NumericData, NumericType, diff --git a/src/bindings/python/src/openvino/opset10/ops.py b/src/bindings/python/src/openvino/opset10/ops.py index d0bc3cbf1cba4a..c7b75777484a59 100644 --- a/src/bindings/python/src/openvino/opset10/ops.py +++ b/src/bindings/python/src/openvino/opset10/ops.py @@ -6,10 +6,10 @@ from functools import partial from typing import List, Optional -from openvino import Node -from openvino.utils.node_factory import _get_node_factory -from openvino.utils.decorators import nameable_op -from openvino.utils.types import ( +from openvino.runtime import Node +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.decorators import nameable_op +from openvino.runtime.utils.types import ( NodeInput, as_nodes, as_node, diff --git a/src/bindings/python/src/openvino/opset11/ops.py b/src/bindings/python/src/openvino/opset11/ops.py index 95767b4800db1c..575c99501d2d6c 100644 --- a/src/bindings/python/src/openvino/opset11/ops.py +++ b/src/bindings/python/src/openvino/opset11/ops.py @@ -6,10 +6,10 @@ from functools import partial from typing import List, Optional -from openvino import Node -from openvino.utils.node_factory import _get_node_factory -from openvino.utils.decorators import nameable_op -from openvino.utils.types import ( +from openvino.runtime import Node +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.decorators import nameable_op +from openvino.runtime.utils.types import ( NodeInput, as_nodes, ) diff --git a/src/bindings/python/src/openvino/opset12/ops.py b/src/bindings/python/src/openvino/opset12/ops.py index 4b354b1fcff973..928bf4f71a9773 100644 --- a/src/bindings/python/src/openvino/opset12/ops.py +++ b/src/bindings/python/src/openvino/opset12/ops.py @@ -6,10 +6,10 @@ from functools import partial from typing import Optional -from openvino import Node -from openvino.utils.node_factory import _get_node_factory -from openvino.utils.decorators import nameable_op -from openvino.utils.types import ( +from openvino.runtime import Node +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.decorators import nameable_op +from openvino.runtime.utils.types import ( NodeInput, as_nodes, as_node, diff --git a/src/bindings/python/src/openvino/opset13/ops.py b/src/bindings/python/src/openvino/opset13/ops.py index 5c6863740120f8..12f0d06b1a28e6 100644 --- a/src/bindings/python/src/openvino/opset13/ops.py +++ b/src/bindings/python/src/openvino/opset13/ops.py @@ -11,12 +11,12 @@ log = logging.getLogger(__name__) -from openvino import Node, Shape, Type, Output, Tensor +from openvino.runtime import Node, Shape, Type, Output, Tensor from openvino.op import Constant, Result from openvino.opset1 import convert_like -from openvino.utils.node_factory import _get_node_factory -from openvino.utils.decorators import binary_op, nameable_op, unary_op, overloading -from openvino.utils.types import ( +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op, overloading +from openvino.runtime.utils.types import ( NumericData, NodeInput, NumericType, diff --git a/src/bindings/python/src/openvino/opset14/ops.py b/src/bindings/python/src/openvino/opset14/ops.py index 59e1bfd3e89c6f..fa872d24eb7f1a 100644 --- a/src/bindings/python/src/openvino/opset14/ops.py +++ b/src/bindings/python/src/openvino/opset14/ops.py @@ -7,11 +7,11 @@ from typing import Union, Optional, List -from openvino import Node, Type -from openvino.utils.node_factory import _get_node_factory -from openvino.utils.types import TensorShape -from openvino.utils.decorators import nameable_op -from openvino.utils.types import NodeInput, as_node, as_nodes +from openvino.runtime import Node, Type +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.types import TensorShape +from openvino.runtime.utils.decorators import nameable_op +from openvino.runtime.utils.types import NodeInput, as_node, as_nodes _get_node_factory_opset14 = partial(_get_node_factory, "opset14") diff --git a/src/bindings/python/src/openvino/opset15/ops.py b/src/bindings/python/src/openvino/opset15/ops.py index 97d4419fc4834b..8e6b8bd46d5f7c 100644 --- a/src/bindings/python/src/openvino/opset15/ops.py +++ b/src/bindings/python/src/openvino/opset15/ops.py @@ -7,12 +7,12 @@ from typing import List, Literal, Optional import numpy as np -from openvino import Node, Type +from openvino.runtime import Node, Type from openvino.opset1 import convert_like from openvino.opset14 import constant -from openvino.utils.node_factory import _get_node_factory -from openvino.utils.decorators import binary_op, nameable_op -from openvino.utils.types import NodeInput, as_nodes +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.decorators import binary_op, nameable_op +from openvino.runtime.utils.types import NodeInput, as_nodes _get_node_factory_opset15 = partial(_get_node_factory, "opset15") diff --git a/src/bindings/python/src/openvino/opset16/ops.py b/src/bindings/python/src/openvino/opset16/ops.py index e5ebdc7a2a11d6..60656f6d993b6a 100644 --- a/src/bindings/python/src/openvino/opset16/ops.py +++ b/src/bindings/python/src/openvino/opset16/ops.py @@ -6,10 +6,10 @@ from functools import partial from typing import Optional -from openvino import Node -from openvino.utils.decorators import nameable_op -from openvino.utils.node_factory import _get_node_factory -from openvino.utils.types import NodeInput, as_nodes +from openvino.runtime import Node +from openvino.runtime.utils.decorators import nameable_op +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.types import NodeInput, as_nodes _get_node_factory_opset16 = partial(_get_node_factory, "opset16") diff --git a/src/bindings/python/src/openvino/opset2/ops.py b/src/bindings/python/src/openvino/opset2/ops.py index f76f608fe9a5c7..45b33f5bc0288b 100644 --- a/src/bindings/python/src/openvino/opset2/ops.py +++ b/src/bindings/python/src/openvino/opset2/ops.py @@ -9,17 +9,18 @@ from functools import partial import warnings -from openvino import Node, Shape +from openvino.runtime import Node, Shape from openvino.op import Constant, Parameter -from openvino.utils.decorators import binary_op, nameable_op, unary_op -from openvino.utils.input_validation import ( +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op +from openvino.runtime.utils.input_validation import ( assert_list_of_ints, check_valid_attributes, is_non_negative_value, is_positive_value, ) -from openvino.utils.node_factory import NodeFactory, _get_node_factory -from openvino.utils.types import ( +from openvino.runtime.utils.node_factory import NodeFactory +from openvino.runtime.utils.types import ( NodeInput, NumericData, NumericType, diff --git a/src/bindings/python/src/openvino/opset3/ops.py b/src/bindings/python/src/openvino/opset3/ops.py index 1c2c7e309fe919..989f5819acb685 100644 --- a/src/bindings/python/src/openvino/opset3/ops.py +++ b/src/bindings/python/src/openvino/opset3/ops.py @@ -8,17 +8,18 @@ import numpy as np from functools import partial -from openvino import Node, Shape +from openvino.runtime import Node, Shape from openvino.op import Constant, Parameter -from openvino.utils.decorators import binary_op, nameable_op, unary_op -from openvino.utils.input_validation import ( +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op +from openvino.runtime.utils.input_validation import ( assert_list_of_ints, check_valid_attributes, is_non_negative_value, is_positive_value, ) -from openvino.utils.node_factory import NodeFactory, _get_node_factory -from openvino.utils.types import ( +from openvino.runtime.utils.node_factory import NodeFactory +from openvino.runtime.utils.types import ( NodeInput, NumericData, NumericType, diff --git a/src/bindings/python/src/openvino/opset4/ops.py b/src/bindings/python/src/openvino/opset4/ops.py index e6f3a3a1550937..4f6ba016852b02 100644 --- a/src/bindings/python/src/openvino/opset4/ops.py +++ b/src/bindings/python/src/openvino/opset4/ops.py @@ -8,17 +8,18 @@ import numpy as np from functools import partial -from openvino import Node, Shape +from openvino.runtime import Node, Shape from openvino.op import Constant, Parameter -from openvino.utils.decorators import binary_op, nameable_op, unary_op -from openvino.utils.input_validation import ( +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op +from openvino.runtime.utils.input_validation import ( assert_list_of_ints, check_valid_attributes, is_non_negative_value, is_positive_value, ) -from openvino.utils.node_factory import NodeFactory, _get_node_factory -from openvino.utils.types import ( +from openvino.runtime.utils.node_factory import NodeFactory +from openvino.runtime.utils.types import ( NodeInput, NumericData, NumericType, diff --git a/src/bindings/python/src/openvino/opset5/ops.py b/src/bindings/python/src/openvino/opset5/ops.py index 9217830752b1d8..20057b78c7c31d 100644 --- a/src/bindings/python/src/openvino/opset5/ops.py +++ b/src/bindings/python/src/openvino/opset5/ops.py @@ -8,17 +8,18 @@ import numpy as np from functools import partial -from openvino import Node, Shape +from openvino.runtime import Node, Shape from openvino.op import Constant, Parameter, loop -from openvino.utils.decorators import binary_op, nameable_op, unary_op -from openvino.utils.input_validation import ( +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op +from openvino.runtime.utils.input_validation import ( assert_list_of_ints, check_valid_attributes, is_non_negative_value, is_positive_value, ) -from openvino.utils.node_factory import NodeFactory, _get_node_factory -from openvino.utils.types import ( +from openvino.runtime.utils.node_factory import NodeFactory +from openvino.runtime.utils.types import ( NodeInput, NumericData, NumericType, diff --git a/src/bindings/python/src/openvino/opset6/ops.py b/src/bindings/python/src/openvino/opset6/ops.py index 340d0405b4ba23..8020715f20dea3 100644 --- a/src/bindings/python/src/openvino/opset6/ops.py +++ b/src/bindings/python/src/openvino/opset6/ops.py @@ -9,13 +9,13 @@ from functools import partial, singledispatch -from openvino import Node, Type, PartialShape, Output, Shape +from openvino.runtime import Node, Type, PartialShape, Output, Shape from openvino.op import assign, Constant, Parameter from openvino.op import read_value as _read_value from openvino.op.util import VariableInfo, Variable -from openvino.utils.node_factory import _get_node_factory -from openvino.utils.decorators import nameable_op, overloading -from openvino.utils.types import ( +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.decorators import nameable_op, overloading +from openvino.runtime.utils.types import ( NodeInput, NumericType, TensorShape, diff --git a/src/bindings/python/src/openvino/opset7/ops.py b/src/bindings/python/src/openvino/opset7/ops.py index e33d266debedf1..59e09b64888eb1 100644 --- a/src/bindings/python/src/openvino/opset7/ops.py +++ b/src/bindings/python/src/openvino/opset7/ops.py @@ -7,17 +7,18 @@ from typing import Callable, Iterable, List, Optional, Set, Union import numpy as np -from openvino import Node, Shape +from openvino.runtime import Node, Shape from openvino.op import Constant, Parameter -from openvino.utils.decorators import binary_op, nameable_op, unary_op -from openvino.utils.input_validation import ( +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op +from openvino.runtime.utils.input_validation import ( assert_list_of_ints, check_valid_attributes, is_non_negative_value, is_positive_value, ) -from openvino.utils.node_factory import NodeFactory, _get_node_factory -from openvino.utils.types import ( +from openvino.runtime.utils.node_factory import NodeFactory +from openvino.runtime.utils.types import ( NodeInput, NumericData, NumericType, diff --git a/src/bindings/python/src/openvino/opset8/ops.py b/src/bindings/python/src/openvino/opset8/ops.py index a9a868e7b541d8..6995d55a28a776 100644 --- a/src/bindings/python/src/openvino/opset8/ops.py +++ b/src/bindings/python/src/openvino/opset8/ops.py @@ -9,15 +9,15 @@ import numpy as np from openvino.exceptions import UserInputError from openvino.op import Constant, Parameter, if_op -from openvino import Node -from openvino.utils.node_factory import _get_node_factory -from openvino.utils.decorators import nameable_op -from openvino.utils.input_validation import ( +from openvino.runtime import Node +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.decorators import nameable_op +from openvino.runtime.utils.input_validation import ( check_valid_attributes, is_non_negative_value, is_positive_value, ) -from openvino.utils.types import ( +from openvino.runtime.utils.types import ( NodeInput, TensorShape, as_node, diff --git a/src/bindings/python/src/openvino/opset9/ops.py b/src/bindings/python/src/openvino/opset9/ops.py index e2264845e058dc..a6d45cfd0be2cc 100644 --- a/src/bindings/python/src/openvino/opset9/ops.py +++ b/src/bindings/python/src/openvino/opset9/ops.py @@ -7,10 +7,10 @@ from typing import Optional import numpy as np -from openvino import Node -from openvino.utils.node_factory import _get_node_factory -from openvino.utils.decorators import nameable_op -from openvino.utils.types import ( +from openvino.runtime import Node +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.decorators import nameable_op +from openvino.runtime.utils.types import ( NodeInput, as_nodes, as_node, diff --git a/src/bindings/python/src/openvino/preprocess/torchvision/preprocess_converter.py b/src/bindings/python/src/openvino/preprocess/torchvision/preprocess_converter.py index 717e945217468c..c14635cc118208 100644 --- a/src/bindings/python/src/openvino/preprocess/torchvision/preprocess_converter.py +++ b/src/bindings/python/src/openvino/preprocess/torchvision/preprocess_converter.py @@ -5,7 +5,7 @@ from typing import Callable, Any, Union import logging -import openvino as ov +import openvino.runtime as ov class PreprocessConverter(): diff --git a/src/bindings/python/src/openvino/preprocess/torchvision/torchvision_preprocessing.py b/src/bindings/python/src/openvino/preprocess/torchvision/torchvision_preprocessing.py index 5dad42b47da44a..f8b51afd546f57 100644 --- a/src/bindings/python/src/openvino/preprocess/torchvision/torchvision_preprocessing.py +++ b/src/bindings/python/src/openvino/preprocess/torchvision/torchvision_preprocessing.py @@ -20,10 +20,10 @@ import torchvision.transforms as transforms from torchvision.transforms import InterpolationMode -import openvino as ov -import openvino.opset11 as ops -from openvino import Layout, Type -from openvino.utils.decorators import custom_preprocess_function +import openvino.runtime as ov +import openvino.runtime.opset11 as ops +from openvino.runtime import Layout, Type +from openvino.runtime.utils.decorators import custom_preprocess_function from openvino.preprocess import PrePostProcessor, ResizeAlgorithm, ColorFormat diff --git a/src/bindings/python/src/openvino/runtime/opset_utils.py b/src/bindings/python/src/openvino/runtime/opset_utils.py new file mode 100644 index 00000000000000..475750e71f87c5 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset_utils.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from typing import Optional +import numpy as np + +from openvino.runtime import Node +from openvino.runtime.utils.decorators import nameable_op +from openvino.runtime.utils.node_factory import NodeFactory +from openvino.runtime.utils.types import ( + as_node, + NodeInput, +) + + +def _get_node_factory(opset_version: Optional[str] = None) -> NodeFactory: + """Return NodeFactory configured to create operators from specified opset version.""" + if opset_version: + return NodeFactory(opset_version) + else: + return NodeFactory() diff --git a/src/bindings/python/src/openvino/runtime/opset_utils/__init__.py b/src/bindings/python/src/openvino/runtime/opset_utils/__init__.py deleted file mode 100644 index 6fb3e5f6f0c950..00000000000000 --- a/src/bindings/python/src/openvino/runtime/opset_utils/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2018-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -from openvino.utils.node_factory import _get_node_factory diff --git a/src/bindings/python/src/openvino/runtime/utils/__init__.py b/src/bindings/python/src/openvino/runtime/utils/__init__.py index 8447e93a907277..73399ccbed2598 100644 --- a/src/bindings/python/src/openvino/runtime/utils/__init__.py +++ b/src/bindings/python/src/openvino/runtime/utils/__init__.py @@ -4,4 +4,4 @@ """Generic utilities. Factor related functions out to separate files.""" -from openvino.utils import numpy_to_c, replace_node, replace_output_update_name +from openvino._pyopenvino.util import numpy_to_c, replace_node, replace_output_update_name diff --git a/src/bindings/python/src/openvino/utils/broadcasting.py b/src/bindings/python/src/openvino/runtime/utils/broadcasting.py similarity index 87% rename from src/bindings/python/src/openvino/utils/broadcasting.py rename to src/bindings/python/src/openvino/runtime/utils/broadcasting.py index 01549625e2c628..9fd13da7728e29 100644 --- a/src/bindings/python/src/openvino/utils/broadcasting.py +++ b/src/bindings/python/src/openvino/runtime/utils/broadcasting.py @@ -3,11 +3,14 @@ # SPDX-License-Identifier: Apache-2.0 import logging -from typing import Optional +from typing import List, Optional -from openvino import AxisSet -from openvino.utils.types import ( +from openvino.runtime import AxisSet, Node +from openvino.runtime.utils.types import ( + NodeInput, TensorShape, + get_dtype, + make_constant_node, ) log = logging.getLogger(__name__) diff --git a/src/bindings/python/src/openvino/runtime/utils/broadcasting/__init__.py b/src/bindings/python/src/openvino/runtime/utils/broadcasting/__init__.py deleted file mode 100644 index 3219f239f0ab44..00000000000000 --- a/src/bindings/python/src/openvino/runtime/utils/broadcasting/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2018-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from openvino.utils.broadcasting import get_broadcast_axes diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py index 282547dd9df79a..a46105efaaeadb 100644 --- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py +++ b/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py @@ -2,7 +2,7 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.utils.data_helpers.data_dispatcher import _data_dispatch -from openvino.utils.data_helpers.wrappers import tensor_from_file -from openvino.utils.data_helpers.wrappers import _InferRequestWrapper -from openvino.utils.data_helpers.wrappers import OVDict +from openvino.runtime.utils.data_helpers.data_dispatcher import _data_dispatch +from openvino.runtime.utils.data_helpers.wrappers import tensor_from_file +from openvino.runtime.utils.data_helpers.wrappers import _InferRequestWrapper +from openvino.runtime.utils.data_helpers.wrappers import OVDict diff --git a/src/bindings/python/src/openvino/utils/data_helpers/data_dispatcher.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher.py similarity index 99% rename from src/bindings/python/src/openvino/utils/data_helpers/data_dispatcher.py rename to src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher.py index d4db7cb07b629c..bce10c9c3774ef 100644 --- a/src/bindings/python/src/openvino/utils/data_helpers/data_dispatcher.py +++ b/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher.py @@ -8,7 +8,7 @@ import numpy as np from openvino._pyopenvino import ConstOutput, Tensor, Type, RemoteTensor -from openvino.utils.data_helpers.wrappers import _InferRequestWrapper, OVDict +from openvino.runtime.utils.data_helpers.wrappers import _InferRequestWrapper, OVDict ContainerTypes = Union[dict, list, tuple, OVDict] ScalarTypes = Union[np.number, int, float] diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher/__init__.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher/__init__.py deleted file mode 100644 index e0a2d022660dd3..00000000000000 --- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2018-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -from openvino.utils.data_helpers.data_dispatcher import ContainerTypes -from openvino.utils.data_helpers.data_dispatcher import ScalarTypes -from openvino.utils.data_helpers.data_dispatcher import ValidKeys - -from openvino.utils.data_helpers.data_dispatcher import is_list_simple_type -from openvino.utils.data_helpers.data_dispatcher import get_request_tensor -from openvino.utils.data_helpers.data_dispatcher import value_to_tensor -from openvino.utils.data_helpers.data_dispatcher import to_c_style -from openvino.utils.data_helpers.data_dispatcher import normalize_arrays -from openvino.utils.data_helpers.data_dispatcher import create_shared -from openvino.utils.data_helpers.data_dispatcher import set_request_tensor -from openvino.utils.data_helpers.data_dispatcher import update_tensor -from openvino.utils.data_helpers.data_dispatcher import update_inputs -from openvino.utils.data_helpers.data_dispatcher import create_copied -from openvino.utils.data_helpers.data_dispatcher import _data_dispatch diff --git a/src/bindings/python/src/openvino/utils/data_helpers/wrappers.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py similarity index 100% rename from src/bindings/python/src/openvino/utils/data_helpers/wrappers.py rename to src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers/__init__.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers/__init__.py deleted file mode 100644 index 22214fd24682da..00000000000000 --- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2018-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -from openvino.utils.data_helpers.wrappers import tensor_from_file -from openvino.utils.data_helpers.wrappers import _InferRequestWrapper -from openvino.utils.data_helpers.wrappers import OVDict diff --git a/src/bindings/python/src/openvino/utils/decorators.py b/src/bindings/python/src/openvino/runtime/utils/decorators.py similarity index 98% rename from src/bindings/python/src/openvino/utils/decorators.py rename to src/bindings/python/src/openvino/runtime/utils/decorators.py index 9418c359d129e8..98da1ba4389ef7 100644 --- a/src/bindings/python/src/openvino/utils/decorators.py +++ b/src/bindings/python/src/openvino/runtime/utils/decorators.py @@ -6,8 +6,8 @@ from inspect import signature from typing import Any, Callable, Dict, Optional, Union, get_origin, get_args -from openvino import Node, Output -from openvino.utils.types import NodeInput, as_node, as_nodes +from openvino.runtime import Node, Output +from openvino.runtime.utils.types import NodeInput, as_node, as_nodes def _get_name(**kwargs: Any) -> Node: diff --git a/src/bindings/python/src/openvino/runtime/utils/decorators/__init__.py b/src/bindings/python/src/openvino/runtime/utils/decorators/__init__.py deleted file mode 100644 index bb0bac112d2c5f..00000000000000 --- a/src/bindings/python/src/openvino/runtime/utils/decorators/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2018-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from openvino.utils.decorators import _get_name -from openvino.utils.decorators import _set_node_friendly_name -from openvino.utils.decorators import nameable_op -from openvino.utils.decorators import unary_op -from openvino.utils.decorators import binary_op -from openvino.utils.decorators import custom_preprocess_function -from openvino.utils.decorators import MultiMethod -from openvino.utils.decorators import registry -from openvino.utils.decorators import overloading diff --git a/src/bindings/python/src/openvino/utils/input_validation.py b/src/bindings/python/src/openvino/runtime/utils/input_validation.py similarity index 98% rename from src/bindings/python/src/openvino/utils/input_validation.py rename to src/bindings/python/src/openvino/runtime/utils/input_validation.py index 1de08452e1da9f..e79a16c48581b1 100644 --- a/src/bindings/python/src/openvino/utils/input_validation.py +++ b/src/bindings/python/src/openvino/runtime/utils/input_validation.py @@ -9,7 +9,7 @@ import numpy as np -from openvino.exceptions import UserInputError +from openvino.runtime.exceptions import UserInputError log = logging.getLogger(__name__) diff --git a/src/bindings/python/src/openvino/runtime/utils/input_validation/__init__.py b/src/bindings/python/src/openvino/runtime/utils/input_validation/__init__.py deleted file mode 100644 index 0b49e9ea33c40d..00000000000000 --- a/src/bindings/python/src/openvino/runtime/utils/input_validation/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2018-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from openvino.utils.input_validation import assert_list_of_ints -from openvino.utils.input_validation import _check_value -from openvino.utils.input_validation import check_valid_attribute -from openvino.utils.input_validation import check_valid_attributes -from openvino.utils.input_validation import is_positive_value -from openvino.utils.input_validation import is_non_negative_value diff --git a/src/bindings/python/src/openvino/utils/node_factory.py b/src/bindings/python/src/openvino/runtime/utils/node_factory.py similarity index 92% rename from src/bindings/python/src/openvino/utils/node_factory.py rename to src/bindings/python/src/openvino/runtime/utils/node_factory.py index e999ae6988814a..25daf739223dba 100644 --- a/src/bindings/python/src/openvino/utils/node_factory.py +++ b/src/bindings/python/src/openvino/runtime/utils/node_factory.py @@ -2,16 +2,17 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import logging as log -from functools import singledispatchmethod +from functools import partial, singledispatchmethod from typing import Any, Dict, List, Optional, Union from pathlib import Path from openvino._pyopenvino import NodeFactory as _NodeFactory -from openvino import Node, Output, Extension +from openvino.runtime import Node, Output, Extension -from openvino.exceptions import UserInputError +from openvino.runtime.exceptions import UserInputError DEFAULT_OPSET = "opset13" @@ -124,11 +125,3 @@ def _arguments_as_outputs(arguments: List[Union[Node, Output]]) -> List[Output]: else: outputs.extend(argument.outputs()) return outputs - - -def _get_node_factory(opset_version: Optional[str] = None) -> NodeFactory: - """Return NodeFactory configured to create operators from specified opset version.""" - if opset_version: - return NodeFactory(opset_version) - else: - return NodeFactory() diff --git a/src/bindings/python/src/openvino/runtime/utils/node_factory/__init__.py b/src/bindings/python/src/openvino/runtime/utils/node_factory/__init__.py deleted file mode 100644 index 945ea8deb7863c..00000000000000 --- a/src/bindings/python/src/openvino/runtime/utils/node_factory/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2018-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from openvino.utils.node_factory import NodeFactory diff --git a/src/bindings/python/src/openvino/utils/reduction.py b/src/bindings/python/src/openvino/runtime/utils/reduction.py similarity index 95% rename from src/bindings/python/src/openvino/utils/reduction.py rename to src/bindings/python/src/openvino/runtime/utils/reduction.py index e6be6d0ac9a104..71d0af8de7376e 100644 --- a/src/bindings/python/src/openvino/utils/reduction.py +++ b/src/bindings/python/src/openvino/runtime/utils/reduction.py @@ -4,7 +4,7 @@ from typing import Iterable, Optional -from openvino import Node +from openvino.runtime import Node def get_reduction_axes(node: Node, reduction_axes: Optional[Iterable[int]]) -> Iterable[int]: diff --git a/src/bindings/python/src/openvino/runtime/utils/reduction/__init__.py b/src/bindings/python/src/openvino/runtime/utils/reduction/__init__.py deleted file mode 100644 index a2fbff9e793dca..00000000000000 --- a/src/bindings/python/src/openvino/runtime/utils/reduction/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2018-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from openvino.utils.reduction import get_reduction_axes diff --git a/src/bindings/python/src/openvino/utils/types.py b/src/bindings/python/src/openvino/runtime/utils/types.py similarity index 97% rename from src/bindings/python/src/openvino/utils/types.py rename to src/bindings/python/src/openvino/runtime/utils/types.py index b3543739741d94..52f1faf8e1e839 100644 --- a/src/bindings/python/src/openvino/utils/types.py +++ b/src/bindings/python/src/openvino/runtime/utils/types.py @@ -9,9 +9,9 @@ import numpy as np -from openvino.exceptions import OVTypeError -from openvino import Node, Shape, Output, Type -from openvino.op import Constant +from openvino.runtime.exceptions import OVTypeError +from openvino.runtime import Node, Shape, Output, Type +from openvino.runtime.op import Constant log = logging.getLogger(__name__) diff --git a/src/bindings/python/src/openvino/runtime/utils/types/__init__.py b/src/bindings/python/src/openvino/runtime/utils/types/__init__.py deleted file mode 100644 index 4f88d609988e8d..00000000000000 --- a/src/bindings/python/src/openvino/runtime/utils/types/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2018-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from openvino.utils.types import TensorShape -from openvino.utils.types import NumericData -from openvino.utils.types import NumericType -from openvino.utils.types import ScalarData -from openvino.utils.types import NodeInput - -from openvino.utils.types import openvino_to_numpy_types_map -from openvino.utils.types import openvino_to_numpy_types_str_map -from openvino.utils.types import get_element_type -from openvino.utils.types import get_element_type_str -from openvino.utils.types import get_dtype -from openvino.utils.types import get_numpy_ctype -from openvino.utils.types import get_ndarray -from openvino.utils.types import get_shape -from openvino.utils.types import make_constant_node -from openvino.utils.types import as_node -from openvino.utils.types import as_nodes diff --git a/src/bindings/python/src/openvino/package_utils.py b/src/bindings/python/src/openvino/utils.py similarity index 97% rename from src/bindings/python/src/openvino/package_utils.py rename to src/bindings/python/src/openvino/utils.py index 6aa3f3ed39b556..9890ae9b3e6460 100644 --- a/src/bindings/python/src/openvino/package_utils.py +++ b/src/bindings/python/src/openvino/utils.py @@ -21,9 +21,9 @@ def _add_openvino_libs_to_search_path() -> None: if os.path.isdir(os.path.join(os.path.dirname(__file__), "libs")): # looking for the libs in the pip installation path. openvino_libs.append(os.path.join(os.path.dirname(__file__), "libs")) - elif os.path.isdir(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir, "Library", "bin")): + elif os.path.isdir(os.path.join(os.path.dirname(__file__), "..", "..", "..", "Library", "bin")): # looking for the libs in the conda installation path - openvino_libs.append(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir, "Library", "bin")) + openvino_libs.append(os.path.join(os.path.dirname(__file__), "..", "..", "..", "Library", "bin")) else: # setupvars.bat script set all libs paths to OPENVINO_LIB_PATHS environment variable. openvino_libs_installer = os.getenv("OPENVINO_LIB_PATHS") diff --git a/src/bindings/python/src/openvino/utils/__init__.py b/src/bindings/python/src/openvino/utils/__init__.py deleted file mode 100644 index 2ccc79d20cce84..00000000000000 --- a/src/bindings/python/src/openvino/utils/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2018-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Generic utilities. Factor related functions out to separate files.""" - -from openvino._pyopenvino.util import numpy_to_c, replace_node, replace_output_update_name - -from openvino.package_utils import get_cmake_path -from openvino.package_utils import deprecated -from openvino.package_utils import classproperty -from openvino.package_utils import deprecatedclassproperty diff --git a/src/bindings/python/src/openvino/utils/data_helpers/__init__.py b/src/bindings/python/src/openvino/utils/data_helpers/__init__.py deleted file mode 100644 index 282547dd9df79a..00000000000000 --- a/src/bindings/python/src/openvino/utils/data_helpers/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2018-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from openvino.utils.data_helpers.data_dispatcher import _data_dispatch -from openvino.utils.data_helpers.wrappers import tensor_from_file -from openvino.utils.data_helpers.wrappers import _InferRequestWrapper -from openvino.utils.data_helpers.wrappers import OVDict diff --git a/src/common/low_precision_transformations/include/low_precision/low_precision.hpp b/src/common/low_precision_transformations/include/low_precision/low_precision.hpp index b3b92340303ced..483ec19f10a224 100644 --- a/src/common/low_precision_transformations/include/low_precision/low_precision.hpp +++ b/src/common/low_precision_transformations/include/low_precision/low_precision.hpp @@ -56,7 +56,7 @@ class ov::pass::low_precision::MarkupOptimizations : public ov::pass::ModelPass class ov::pass::low_precision::TypeRelaxedReplacer : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("TypeRelaxedReplacer", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("low_precision::TypeRelaxedReplacer"); TypeRelaxedReplacer(); }; diff --git a/src/common/offline_transformations/include/compress_quantize_weights.hpp b/src/common/offline_transformations/include/compress_quantize_weights.hpp index 9b2792caf93d47..90a2a434ae346a 100644 --- a/src/common/offline_transformations/include/compress_quantize_weights.hpp +++ b/src/common/offline_transformations/include/compress_quantize_weights.hpp @@ -102,6 +102,6 @@ class ov::pass::CompressWeightsWithFakeConvert : public ov::pass::MatcherPass { class ov::pass::CompressQuantizeWeights : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("CompressQuantizeWeights", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("CompressQuantizeWeights"); CompressQuantizeWeights(); }; diff --git a/src/common/offline_transformations/include/pruning.hpp b/src/common/offline_transformations/include/pruning.hpp index e573108a89eb86..13e46777fde205 100644 --- a/src/common/offline_transformations/include/pruning.hpp +++ b/src/common/offline_transformations/include/pruning.hpp @@ -29,7 +29,7 @@ class Pruning; */ class ov::pass::InitMasks : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("InitMasks", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("InitMasks"); InitMasks(); }; @@ -56,7 +56,7 @@ class ov::pass::InitConstMask : public ov::pass::MatcherPass { */ class ov::pass::PropagateMasks : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("PropagateMasks", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("PropagateMasks"); PropagateMasks(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/adaptive_pool_to_reduce.hpp b/src/common/transformations/include/transformations/common_optimizations/adaptive_pool_to_reduce.hpp index 06dde1ff0bbf63..be271bc71f4b55 100644 --- a/src/common/transformations/include/transformations/common_optimizations/adaptive_pool_to_reduce.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/adaptive_pool_to_reduce.hpp @@ -25,6 +25,6 @@ class TRANSFORMATIONS_API AdaptivePoolToReduce; class ov::pass::AdaptivePoolToReduce : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("AdaptivePoolToReduce", "0"); + OPENVINO_MATCHER_PASS_RTTI("AdaptivePoolToReduce"); AdaptivePoolToReduce(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/add_fake_quantize_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/add_fake_quantize_fusion.hpp index f63f00a455a117..3dc06301c838f2 100644 --- a/src/common/transformations/include/transformations/common_optimizations/add_fake_quantize_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/add_fake_quantize_fusion.hpp @@ -27,6 +27,6 @@ class TRANSFORMATIONS_API AddFakeQuantizeFusion; */ class ov::pass::AddFakeQuantizeFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("AddFakeQuantizeFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("AddFakeQuantizeFusion"); AddFakeQuantizeFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/align_eltwise_input_ranks.hpp b/src/common/transformations/include/transformations/common_optimizations/align_eltwise_input_ranks.hpp index 1a7578d3551903..c18f1c96bc41bc 100644 --- a/src/common/transformations/include/transformations/common_optimizations/align_eltwise_input_ranks.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/align_eltwise_input_ranks.hpp @@ -17,7 +17,7 @@ namespace pass { class TRANSFORMATIONS_API AlignEltwiseInputRanks : public MatcherPass { public: - OPENVINO_RTTI("AlignEltwiseInputRanks", "0"); + OPENVINO_MATCHER_PASS_RTTI("AlignEltwiseInputRanks"); AlignEltwiseInputRanks(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/augru_cell_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/augru_cell_fusion.hpp index c6773de5ec894f..2d458e5e18f87f 100644 --- a/src/common/transformations/include/transformations/common_optimizations/augru_cell_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/augru_cell_fusion.hpp @@ -32,6 +32,6 @@ class TRANSFORMATIONS_API AUGRUCellFusion; class ov::pass::AUGRUCellFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("AUGRUCellFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("AUGRUCellFusion"); AUGRUCellFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/batch_to_space_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/batch_to_space_fusion.hpp index 7b05f721f52400..4ab3210a633798 100644 --- a/src/common/transformations/include/transformations/common_optimizations/batch_to_space_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/batch_to_space_fusion.hpp @@ -31,6 +31,6 @@ class TRANSFORMATIONS_API BatchToSpaceFusion; class ov::pass::BatchToSpaceFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("BatchToSpaceFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("BatchToSpaceFusion"); BatchToSpaceFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/binarize_weights.hpp b/src/common/transformations/include/transformations/common_optimizations/binarize_weights.hpp index 3803281478a055..2e975e27b26f97 100644 --- a/src/common/transformations/include/transformations/common_optimizations/binarize_weights.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/binarize_weights.hpp @@ -76,6 +76,6 @@ class TRANSFORMATIONS_API BinarizeWeights; class ov::pass::BinarizeWeights : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("BinarizeWeights", "0"); + OPENVINO_MATCHER_PASS_RTTI("BinarizeWeights"); BinarizeWeights(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/broadcast_elementwise_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/broadcast_elementwise_fusion.hpp index 38ae4799e932a0..0fffa95e62b04c 100644 --- a/src/common/transformations/include/transformations/common_optimizations/broadcast_elementwise_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/broadcast_elementwise_fusion.hpp @@ -23,6 +23,6 @@ class TRANSFORMATIONS_API BroadcastElementwiseFusion; class ov::pass::BroadcastElementwiseFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("BroadcastElementwiseFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("BroadcastElementwiseFusion"); BroadcastElementwiseFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/broadcast_transition.hpp b/src/common/transformations/include/transformations/common_optimizations/broadcast_transition.hpp index 089f1472b7c431..ff9b8151e7e47f 100644 --- a/src/common/transformations/include/transformations/common_optimizations/broadcast_transition.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/broadcast_transition.hpp @@ -24,6 +24,6 @@ class TRANSFORMATIONS_API BroadcastTransition; */ class ov::pass::BroadcastTransition : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("BroadcastTransition", "0"); + OPENVINO_MATCHER_PASS_RTTI("BroadcastTransition"); BroadcastTransition(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/clamp_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/clamp_fusion.hpp index fe966323edbb98..69870870d8758a 100644 --- a/src/common/transformations/include/transformations/common_optimizations/clamp_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/clamp_fusion.hpp @@ -29,6 +29,6 @@ class TRANSFORMATIONS_API ClampFusion; class ov::pass::ClampFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ClampFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("ClampFusion"); ClampFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/compress_float_constants.hpp b/src/common/transformations/include/transformations/common_optimizations/compress_float_constants.hpp index 77bac5753bd757..2f87e5caa8c483 100644 --- a/src/common/transformations/include/transformations/common_optimizations/compress_float_constants.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/compress_float_constants.hpp @@ -25,7 +25,7 @@ bool TRANSFORMATIONS_API is_model_optimized(const std::shared_ptr& mo */ class ov::pass::CompressFloatConstantsImpl : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("CompressFloatConstantsImpl", "0"); + OPENVINO_MATCHER_PASS_RTTI("CompressFloatConstantsImpl"); /// @brief Transformation constructor /// @param postponed If true then the transformation won't compress the constants /// keeping them in the original type but still will insert Converts. This is @@ -41,7 +41,7 @@ class ov::pass::CompressFloatConstantsImpl : public ov::pass::MatcherPass { */ class ov::pass::CompressFloatConstants : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("CompressFloatConstants", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("CompressFloatConstants"); /// @brief Transformation constructor /// @param postponed Postponed compression, see ov::pass::CompressFloatConstantsImpl for details. CompressFloatConstants(bool postponed = false) { diff --git a/src/common/transformations/include/transformations/common_optimizations/concat_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/concat_fusion.hpp index 31b710722bf13f..2642e0cc35c39c 100644 --- a/src/common/transformations/include/transformations/common_optimizations/concat_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/concat_fusion.hpp @@ -25,6 +25,6 @@ class TRANSFORMATIONS_API ConcatFusion; class ov::pass::ConcatFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ConcatFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("ConcatFusion"); ConcatFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/concat_reduce_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/concat_reduce_fusion.hpp index 4aa5391dd42618..a1edbb5dafd32e 100644 --- a/src/common/transformations/include/transformations/common_optimizations/concat_reduce_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/concat_reduce_fusion.hpp @@ -24,7 +24,7 @@ class TRANSFORMATIONS_API ConcatReduceFusion; */ class ov::pass::ReplaceConcatReduceByMinOrMax : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ReplaceConcatReduceByMinOrMax", "0"); + OPENVINO_MATCHER_PASS_RTTI("ReplaceConcatReduceByMinOrMax"); ReplaceConcatReduceByMinOrMax(); }; @@ -34,7 +34,7 @@ class ov::pass::ReplaceConcatReduceByMinOrMax : public ov::pass::MatcherPass { */ class ov::pass::PullSqueezeThroughEltwise : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PullSqueezeThroughEltwise", "0"); + OPENVINO_MATCHER_PASS_RTTI("PullSqueezeThroughEltwise"); PullSqueezeThroughEltwise(); }; @@ -76,6 +76,6 @@ class ov::pass::PullSqueezeThroughEltwise : public ov::pass::MatcherPass { class ov::pass::ConcatReduceFusion : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("ConcatReduceFusion", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("ConcatReduceFusion"); ConcatReduceFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/concat_to_broadcast.hpp b/src/common/transformations/include/transformations/common_optimizations/concat_to_broadcast.hpp index ddb20338c0b01d..7859673f08e309 100644 --- a/src/common/transformations/include/transformations/common_optimizations/concat_to_broadcast.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/concat_to_broadcast.hpp @@ -22,6 +22,6 @@ class TRANSFORMATIONS_API ConcatToBroadcast; */ class ov::pass::ConcatToBroadcast : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ConcatToBroadcast", "0"); + OPENVINO_MATCHER_PASS_RTTI("ConcatToBroadcast"); ConcatToBroadcast(); -}; \ No newline at end of file +}; diff --git a/src/common/transformations/include/transformations/common_optimizations/conv_mul_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/conv_mul_fusion.hpp index 9e2ba5194367ba..0ae7cc67bd4be4 100644 --- a/src/common/transformations/include/transformations/common_optimizations/conv_mul_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/conv_mul_fusion.hpp @@ -23,24 +23,24 @@ class TRANSFORMATIONS_API GroupConvolutionBackpropDataMultiplyFusion; class ov::pass::ConvolutionMultiplyFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ConvolutionMultiplyFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("ConvolutionMultiplyFusion"); ConvolutionMultiplyFusion(); }; class ov::pass::GroupConvolutionMultiplyFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("GroupConvolutionMultiplyFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("GroupConvolutionMultiplyFusion"); GroupConvolutionMultiplyFusion(); }; class ov::pass::ConvolutionBackpropDataMultiplyFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ConvolutionBackpropDataMultiplyFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("ConvolutionBackpropDataMultiplyFusion"); ConvolutionBackpropDataMultiplyFusion(); }; class ov::pass::GroupConvolutionBackpropDataMultiplyFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("GroupConvolutionBackpropDataMultiplyFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("GroupConvolutionBackpropDataMultiplyFusion"); GroupConvolutionBackpropDataMultiplyFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/conv_to_binary_conv.hpp b/src/common/transformations/include/transformations/common_optimizations/conv_to_binary_conv.hpp index a60a15d37be9e7..b87013067ca18c 100644 --- a/src/common/transformations/include/transformations/common_optimizations/conv_to_binary_conv.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/conv_to_binary_conv.hpp @@ -72,6 +72,6 @@ class TRANSFORMATIONS_API ConvToBinaryConv; */ class ov::pass::ConvToBinaryConv : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ConvToBinaryConv", "0"); + OPENVINO_MATCHER_PASS_RTTI("ConvToBinaryConv"); ConvToBinaryConv(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp b/src/common/transformations/include/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp index d778c255160281..6982e985016402 100644 --- a/src/common/transformations/include/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp @@ -27,6 +27,6 @@ class TRANSFORMATIONS_API ConvertNmsGatherPathToUnsigned; */ class ov::pass::ConvertNmsGatherPathToUnsigned : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("ConvertNmsGatherPathToUnsigned", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("ConvertNmsGatherPathToUnsigned"); ConvertNmsGatherPathToUnsigned(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/convert_quantize_dequantize.hpp b/src/common/transformations/include/transformations/common_optimizations/convert_quantize_dequantize.hpp index 7c0b6b5be95d5d..7dc5639d5e7cf4 100644 --- a/src/common/transformations/include/transformations/common_optimizations/convert_quantize_dequantize.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/convert_quantize_dequantize.hpp @@ -31,6 +31,6 @@ class TRANSFORMATIONS_API ConvertQuantizeDequantize; class ov::pass::ConvertQuantizeDequantize : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ConvertQuantizeDequantize", "0"); + OPENVINO_MATCHER_PASS_RTTI("ConvertQuantizeDequantize"); ConvertQuantizeDequantize(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp b/src/common/transformations/include/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp index dfd9eef8069665..f20716cbdc7023 100644 --- a/src/common/transformations/include/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp @@ -21,6 +21,6 @@ class TRANSFORMATIONS_API ConvertU4WeightsZeroPointToScalar; */ class ov::pass::ConvertU4WeightsZeroPointToScalar : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ConvertU4WeightsZeroPointToScalar", "0"); + OPENVINO_MATCHER_PASS_RTTI("ConvertU4WeightsZeroPointToScalar"); ConvertU4WeightsZeroPointToScalar(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/convolution_to_group_convolution_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/convolution_to_group_convolution_fusion.hpp index a99d4ea801d8ec..b6b909f23afc18 100644 --- a/src/common/transformations/include/transformations/common_optimizations/convolution_to_group_convolution_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/convolution_to_group_convolution_fusion.hpp @@ -24,7 +24,7 @@ namespace pass { */ class TRANSFORMATIONS_API ConvolutionToGroupConvolutionFusion : public MatcherPass { public: - OPENVINO_RTTI("ConvolutionToGroupConvolutionFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("ConvolutionToGroupConvolutionFusion"); ConvolutionToGroupConvolutionFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/depth_to_space_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/depth_to_space_fusion.hpp index 53e6b623d67d5a..2aa5b8f0efa8c1 100644 --- a/src/common/transformations/include/transformations/common_optimizations/depth_to_space_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/depth_to_space_fusion.hpp @@ -42,6 +42,6 @@ class TRANSFORMATIONS_API DepthToSpaceFusion; class ov::pass::DepthToSpaceFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("DepthToSpaceFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("DepthToSpaceFusion"); DepthToSpaceFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/dilated_convolution_converter.hpp b/src/common/transformations/include/transformations/common_optimizations/dilated_convolution_converter.hpp index 204190cafd305c..f946a1792c0323 100644 --- a/src/common/transformations/include/transformations/common_optimizations/dilated_convolution_converter.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/dilated_convolution_converter.hpp @@ -29,6 +29,6 @@ class TRANSFORMATIONS_API DilatedConvolutionConverter; class ov::pass::DilatedConvolutionConverter : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("DilatedConvolutionConverter", "0"); + OPENVINO_MATCHER_PASS_RTTI("DilatedConvolutionConverter"); DilatedConvolutionConverter(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/disable_random_uniform_constant_folding.hpp b/src/common/transformations/include/transformations/common_optimizations/disable_random_uniform_constant_folding.hpp index 405e8ff7288e5d..de0840c0dce97f 100644 --- a/src/common/transformations/include/transformations/common_optimizations/disable_random_uniform_constant_folding.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/disable_random_uniform_constant_folding.hpp @@ -24,6 +24,6 @@ class TRANSFORMATIONS_API DisableRandomUniformConstantFolding; */ class ov::pass::DisableRandomUniformConstantFolding : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("DisableRandomUniformConstantFolding", "0"); + OPENVINO_MATCHER_PASS_RTTI("DisableRandomUniformConstantFolding"); DisableRandomUniformConstantFolding(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/disable_shapeof_constant_folding.hpp b/src/common/transformations/include/transformations/common_optimizations/disable_shapeof_constant_folding.hpp index 1526fea966347d..912c21a29ba7c6 100644 --- a/src/common/transformations/include/transformations/common_optimizations/disable_shapeof_constant_folding.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/disable_shapeof_constant_folding.hpp @@ -19,6 +19,6 @@ class TRANSFORMATIONS_API DisableShapeOfConstantFolding; class ov::pass::DisableShapeOfConstantFolding : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("DisableShapeOfConstantFolding", "0"); + OPENVINO_MATCHER_PASS_RTTI("DisableShapeOfConstantFolding"); explicit DisableShapeOfConstantFolding(bool check_shape = true); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/divide_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/divide_fusion.hpp index 37a5b1f66a5551..fb9f8d5b122110 100644 --- a/src/common/transformations/include/transformations/common_optimizations/divide_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/divide_fusion.hpp @@ -24,6 +24,6 @@ class TRANSFORMATIONS_API DivideFusion; */ class ov::pass::DivideFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("DivideFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("DivideFusion"); DivideFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/dropout_with_random_uniform_replacer.hpp b/src/common/transformations/include/transformations/common_optimizations/dropout_with_random_uniform_replacer.hpp index 2ad17f308d46bc..6fdfaa7cc7caa1 100644 --- a/src/common/transformations/include/transformations/common_optimizations/dropout_with_random_uniform_replacer.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/dropout_with_random_uniform_replacer.hpp @@ -36,6 +36,6 @@ class TRANSFORMATIONS_API DropoutWithRandomUniformReplacer; */ class ov::pass::DropoutWithRandomUniformReplacer : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("DropoutWithRandomUniformReplacer", "0"); + OPENVINO_MATCHER_PASS_RTTI("DropoutWithRandomUniformReplacer"); DropoutWithRandomUniformReplacer(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/eliminate_duplicate_ti_inputs.hpp b/src/common/transformations/include/transformations/common_optimizations/eliminate_duplicate_ti_inputs.hpp index 058daba182b8a6..d9b7db1d08519b 100644 --- a/src/common/transformations/include/transformations/common_optimizations/eliminate_duplicate_ti_inputs.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/eliminate_duplicate_ti_inputs.hpp @@ -26,6 +26,6 @@ class TRANSFORMATIONS_API EliminateDuplicateTIInputs; class ov::pass::EliminateDuplicateTIInputs : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateDuplicateTIInputs", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateDuplicateTIInputs"); EliminateDuplicateTIInputs(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/eliminate_loop_inputs_outputs.hpp b/src/common/transformations/include/transformations/common_optimizations/eliminate_loop_inputs_outputs.hpp index ca58bb504fd811..818e312e91f3d2 100644 --- a/src/common/transformations/include/transformations/common_optimizations/eliminate_loop_inputs_outputs.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/eliminate_loop_inputs_outputs.hpp @@ -30,6 +30,6 @@ class TRANSFORMATIONS_API EliminateLoopInputsOutputs; class ov::pass::EliminateLoopInputsOutputs : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateLoopInputsOutputs", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateLoopInputsOutputs"); EliminateLoopInputsOutputs(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/eliminate_unsqueeze_gather.hpp b/src/common/transformations/include/transformations/common_optimizations/eliminate_unsqueeze_gather.hpp index b0ce4581a25569..95f52601c031ab 100644 --- a/src/common/transformations/include/transformations/common_optimizations/eliminate_unsqueeze_gather.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/eliminate_unsqueeze_gather.hpp @@ -24,7 +24,7 @@ class TRANSFORMATIONS_API EliminateGatherUnsqueeze; class ov::pass::EliminateUnsqueezeGather : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateUnsqueezeGather", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateUnsqueezeGather"); EliminateUnsqueezeGather(); }; @@ -38,6 +38,6 @@ class ov::pass::EliminateUnsqueezeGather : public ov::pass::MatcherPass { class ov::pass::EliminateGatherUnsqueeze : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateGatherUnsqueeze", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateGatherUnsqueeze"); EliminateGatherUnsqueeze(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/enable_shapeof_constant_folding.hpp b/src/common/transformations/include/transformations/common_optimizations/enable_shapeof_constant_folding.hpp index ab515ce65ac83b..dc81277841570d 100644 --- a/src/common/transformations/include/transformations/common_optimizations/enable_shapeof_constant_folding.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/enable_shapeof_constant_folding.hpp @@ -17,7 +17,7 @@ namespace pass { */ class TRANSFORMATIONS_API EnableShapeOfConstantFolding : public MatcherPass { public: - OPENVINO_RTTI("EnableShapeOfConstantFolding", "0"); + OPENVINO_MATCHER_PASS_RTTI("EnableShapeOfConstantFolding"); explicit EnableShapeOfConstantFolding(bool check_shape = true); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/fold_subgraph_empty_inputs.hpp b/src/common/transformations/include/transformations/common_optimizations/fold_subgraph_empty_inputs.hpp index 8775d93644456e..89024746ee7181 100644 --- a/src/common/transformations/include/transformations/common_optimizations/fold_subgraph_empty_inputs.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/fold_subgraph_empty_inputs.hpp @@ -34,7 +34,7 @@ TRANSFORMATIONS_API bool fold_subgraph_empty_inputs_is_disabled(const std::share class ov::pass::FoldSubgraphEmptyInputs : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("FoldSubgraphEmptyInputs", "0"); + OPENVINO_MATCHER_PASS_RTTI("FoldSubgraphEmptyInputs"); FoldSubgraphEmptyInputs(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/fq_mul_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/fq_mul_fusion.hpp index 3b48e9da740269..d3215c8cb7168d 100644 --- a/src/common/transformations/include/transformations/common_optimizations/fq_mul_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/fq_mul_fusion.hpp @@ -27,6 +27,6 @@ class TRANSFORMATIONS_API FakeQuantizeMulFusion; class ov::pass::FakeQuantizeMulFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("FakeQuantizeMulFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("FakeQuantizeMulFusion"); FakeQuantizeMulFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/fq_reshape_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/fq_reshape_fusion.hpp index e4004d794d8ec7..361872e80b1d60 100644 --- a/src/common/transformations/include/transformations/common_optimizations/fq_reshape_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/fq_reshape_fusion.hpp @@ -27,6 +27,6 @@ class TRANSFORMATIONS_API FakeQuantizeReshapeFusion; class ov::pass::FakeQuantizeReshapeFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("FakeQuantizeReshapeFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("FakeQuantizeReshapeFusion"); FakeQuantizeReshapeFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp b/src/common/transformations/include/transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp index 3449151ab93ac5..51177738c1e2d5 100644 --- a/src/common/transformations/include/transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp @@ -26,55 +26,55 @@ class TRANSFORMATIONS_API RoPEShareCosSin; class ov::pass::RoPEFusionGPTNEOX : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("RoPEFusionGPTNEOX", "0"); + OPENVINO_MATCHER_PASS_RTTI("RoPEFusionGPTNEOX"); RoPEFusionGPTNEOX(); }; class ov::pass::RoPEFusionFlux : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("RoPEFusionFlux", "0"); + OPENVINO_MATCHER_PASS_RTTI("RoPEFusionFlux"); RoPEFusionFlux(); }; class ov::pass::RoPEFusionGPTJ : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("RoPEFusionGPTJ", "0"); + OPENVINO_MATCHER_PASS_RTTI("RoPEFusionGPTJ"); RoPEFusionGPTJ(); }; class ov::pass::RoPEFusionChatGLM : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("RoPEFusionChatGLM", "0"); + OPENVINO_MATCHER_PASS_RTTI("RoPEFusionChatGLM"); RoPEFusionChatGLM(int split_output_id, const bool support_2d_rope = false); }; class ov::pass::RoPEFusionQwen : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("RoPEFusionQwen", "0"); + OPENVINO_MATCHER_PASS_RTTI("RoPEFusionQwen"); RoPEFusionQwen(int split_output_id); }; class ov::pass::RoPEFusionIOSlicing : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("RoPEFusionIOSlicing", "0"); + OPENVINO_MATCHER_PASS_RTTI("RoPEFusionIOSlicing"); RoPEFusionIOSlicing(); }; class ov::pass::RoPEFusionPreprocess : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("RoPEFusionPreprocess", "0"); + OPENVINO_MATCHER_PASS_RTTI("RoPEFusionPreprocess"); RoPEFusionPreprocess(); }; class ov::pass::RoPEFusionCosSinPreprocess : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("RoPEFusionCosSinPreprocess", "0"); + OPENVINO_MATCHER_PASS_RTTI("RoPEFusionCosSinPreprocess"); RoPEFusionCosSinPreprocess(); }; class ov::pass::RoPEShareCosSin : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("RoPEShareCosSin", "0"); + OPENVINO_MATCHER_PASS_RTTI("RoPEShareCosSin"); RoPEShareCosSin(); private: @@ -90,7 +90,7 @@ class ov::pass::RoPEShareCosSin : public ov::pass::MatcherPass { */ class ov::pass::RoPEFusion : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("RoPEFusion", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("RoPEFusion"); RoPEFusion(bool support_2d_rope = false) { add_matcher(); add_matcher(); diff --git a/src/common/transformations/include/transformations/common_optimizations/gelu_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/gelu_fusion.hpp index c3e50b7c992a53..7fd9826b0374be 100644 --- a/src/common/transformations/include/transformations/common_optimizations/gelu_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/gelu_fusion.hpp @@ -31,7 +31,7 @@ class TRANSFORMATIONS_API GeluFusionWithTanhNoPower; */ class ov::pass::GeluFusionWithErfOne : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("GeluFusionWithErfOne", "0"); + OPENVINO_MATCHER_PASS_RTTI("GeluFusionWithErfOne"); GeluFusionWithErfOne(); }; @@ -42,7 +42,7 @@ class ov::pass::GeluFusionWithErfOne : public ov::pass::MatcherPass { */ class ov::pass::GeluFusionWithErfTwo : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("GeluFusionWithErfTwo", "0"); + OPENVINO_MATCHER_PASS_RTTI("GeluFusionWithErfTwo"); GeluFusionWithErfTwo(); }; @@ -53,7 +53,7 @@ class ov::pass::GeluFusionWithErfTwo : public ov::pass::MatcherPass { */ class ov::pass::GeluFusionWithErfThree : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("GeluFusionWithErfThree", "0"); + OPENVINO_MATCHER_PASS_RTTI("GeluFusionWithErfThree"); GeluFusionWithErfThree(); }; @@ -64,7 +64,7 @@ class ov::pass::GeluFusionWithErfThree : public ov::pass::MatcherPass { */ class ov::pass::GeluFusionWithErfFour : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("GeluFusionWithErfFour", "0"); + OPENVINO_MATCHER_PASS_RTTI("GeluFusionWithErfFour"); GeluFusionWithErfFour(); }; @@ -75,7 +75,7 @@ class ov::pass::GeluFusionWithErfFour : public ov::pass::MatcherPass { */ class ov::pass::GeluFusionWithTanh : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("GeluFusionWithTanh", "0"); + OPENVINO_MATCHER_PASS_RTTI("GeluFusionWithTanh"); GeluFusionWithTanh(); }; @@ -86,7 +86,7 @@ class ov::pass::GeluFusionWithTanh : public ov::pass::MatcherPass { */ class ov::pass::GeluFusionWithTanhNoPower : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("GeluFusionWithTanhNoPower", "0"); + OPENVINO_MATCHER_PASS_RTTI("GeluFusionWithTanhNoPower"); GeluFusionWithTanhNoPower(); }; @@ -96,7 +96,7 @@ class ov::pass::GeluFusionWithTanhNoPower : public ov::pass::MatcherPass { */ class ov::pass::GeluFusion : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("GeluFusion", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("GeluFusion"); GeluFusion() { add_matcher(); add_matcher(); diff --git a/src/common/transformations/include/transformations/common_optimizations/glu_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/glu_fusion.hpp index 7ec71a05027d80..e55a76e031f8f6 100644 --- a/src/common/transformations/include/transformations/common_optimizations/glu_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/glu_fusion.hpp @@ -13,7 +13,7 @@ namespace pass { class TRANSFORMATIONS_API GLUFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("GLUFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("GLUFusion"); GLUFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/gru_cell_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/gru_cell_fusion.hpp index 50bc5ac370a74c..12031de6fc1c92 100644 --- a/src/common/transformations/include/transformations/common_optimizations/gru_cell_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/gru_cell_fusion.hpp @@ -35,6 +35,6 @@ class TRANSFORMATIONS_API GRUCellFusion; class ov::pass::GRUCellFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("GRUCellFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("GRUCellFusion"); GRUCellFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/hsigmoid_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/hsigmoid_fusion.hpp index 04841837a5a76b..5b301246c7a541 100644 --- a/src/common/transformations/include/transformations/common_optimizations/hsigmoid_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/hsigmoid_fusion.hpp @@ -29,7 +29,7 @@ class TRANSFORMATIONS_API HSigmoidFusionWithClampDiv; */ class ov::pass::HSigmoidFusionWithReluDiv : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("HSigmoidFusionWithReluDiv", "0"); + OPENVINO_MATCHER_PASS_RTTI("HSigmoidFusionWithReluDiv"); HSigmoidFusionWithReluDiv(); }; @@ -39,7 +39,7 @@ class ov::pass::HSigmoidFusionWithReluDiv : public ov::pass::MatcherPass { */ class ov::pass::HSigmoidFusionWithReluMul : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("HSigmoidFusionWithReluMul", "0"); + OPENVINO_MATCHER_PASS_RTTI("HSigmoidFusionWithReluMul"); HSigmoidFusionWithReluMul(); }; @@ -49,7 +49,7 @@ class ov::pass::HSigmoidFusionWithReluMul : public ov::pass::MatcherPass { */ class ov::pass::HSigmoidFusionWithoutRelu : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("HSigmoidFusionWithoutRelu", "0"); + OPENVINO_MATCHER_PASS_RTTI("HSigmoidFusionWithoutRelu"); HSigmoidFusionWithoutRelu(); }; @@ -59,7 +59,7 @@ class ov::pass::HSigmoidFusionWithoutRelu : public ov::pass::MatcherPass { */ class ov::pass::HSigmoidFusionWithClampMul : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("HSigmoidFusionWithClampMul", "0"); + OPENVINO_MATCHER_PASS_RTTI("HSigmoidFusionWithClampMul"); HSigmoidFusionWithClampMul(); }; @@ -69,7 +69,7 @@ class ov::pass::HSigmoidFusionWithClampMul : public ov::pass::MatcherPass { */ class ov::pass::HSigmoidFusionWithClampDiv : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("HSigmoidFusionWithClampDiv", "0"); + OPENVINO_MATCHER_PASS_RTTI("HSigmoidFusionWithClampDiv"); HSigmoidFusionWithClampDiv(); }; @@ -79,7 +79,7 @@ class ov::pass::HSigmoidFusionWithClampDiv : public ov::pass::MatcherPass { */ class ov::pass::HSigmoidFusion : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("HSigmoidFusion", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("HSigmoidFusion"); HSigmoidFusion() { add_matcher(); add_matcher(); diff --git a/src/common/transformations/include/transformations/common_optimizations/hswish_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/hswish_fusion.hpp index ede2769ec278f1..7b1faa990dd360 100644 --- a/src/common/transformations/include/transformations/common_optimizations/hswish_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/hswish_fusion.hpp @@ -28,7 +28,7 @@ class TRANSFORMATIONS_API HSwishFusionWithClamp; */ class ov::pass::HSwishFusionWithReluDiv : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("HSwishFusionWithReluDiv", "0"); + OPENVINO_MATCHER_PASS_RTTI("HSwishFusionWithReluDiv"); HSwishFusionWithReluDiv(); }; @@ -38,7 +38,7 @@ class ov::pass::HSwishFusionWithReluDiv : public ov::pass::MatcherPass { */ class ov::pass::HSwishFusionWithReluMul : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("HSwishFusionWithReluMul", "0"); + OPENVINO_MATCHER_PASS_RTTI("HSwishFusionWithReluMul"); HSwishFusionWithReluMul(); }; @@ -48,7 +48,7 @@ class ov::pass::HSwishFusionWithReluMul : public ov::pass::MatcherPass { */ class ov::pass::HSwishFusionWithHSigmoid : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("HSwishFusionWithHSigmoid", "0"); + OPENVINO_MATCHER_PASS_RTTI("HSwishFusionWithHSigmoid"); HSwishFusionWithHSigmoid(); }; @@ -58,7 +58,7 @@ class ov::pass::HSwishFusionWithHSigmoid : public ov::pass::MatcherPass { */ class ov::pass::HSwishFusionWithClamp : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("HSwishFusionWithClamp", "0"); + OPENVINO_MATCHER_PASS_RTTI("HSwishFusionWithClamp"); HSwishFusionWithClamp(); }; @@ -68,7 +68,7 @@ class ov::pass::HSwishFusionWithClamp : public ov::pass::MatcherPass { */ class ov::pass::HSwishFusion : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("HSwishFusion", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("HSwishFusion"); HSwishFusion() { add_matcher(); add_matcher(); diff --git a/src/common/transformations/include/transformations/common_optimizations/interpolate_sequence_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/interpolate_sequence_fusion.hpp index ff7495e5a2d405..af6ba9055b45e1 100644 --- a/src/common/transformations/include/transformations/common_optimizations/interpolate_sequence_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/interpolate_sequence_fusion.hpp @@ -25,6 +25,6 @@ class TRANSFORMATIONS_API InterpolateSequenceFusion; */ class ov::pass::InterpolateSequenceFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("InterpolateSequenceFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("InterpolateSequenceFusion"); InterpolateSequenceFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/leaky_relu_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/leaky_relu_fusion.hpp index 42abc74e0dbc2a..93765f2e39c2ef 100644 --- a/src/common/transformations/include/transformations/common_optimizations/leaky_relu_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/leaky_relu_fusion.hpp @@ -26,6 +26,6 @@ class TRANSFORMATIONS_API LeakyReluFusion; class ov::pass::LeakyReluFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("LeakyReluFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("LeakyReluFusion"); LeakyReluFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/lin_op_sequence_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/lin_op_sequence_fusion.hpp index 2e239e40c332b9..cc71a676eb3b60 100644 --- a/src/common/transformations/include/transformations/common_optimizations/lin_op_sequence_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/lin_op_sequence_fusion.hpp @@ -23,19 +23,19 @@ class TRANSFORMATIONS_API MultiplyMultiplyFusion; class ov::pass::AddMultiplyFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("AddMultiplyFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("AddMultiplyFusion"); AddMultiplyFusion(); }; class ov::pass::AddAddFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("AddAddFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("AddAddFusion"); AddAddFusion(); }; class ov::pass::MultiplyMultiplyFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MultiplyMultiplyFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("MultiplyMultiplyFusion"); MultiplyMultiplyFusion(); }; @@ -45,7 +45,7 @@ class ov::pass::MultiplyMultiplyFusion : public ov::pass::MatcherPass { */ class ov::pass::LinOpSequenceFusion : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("LinOpSequenceFusion", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("LinOpSequenceFusion"); LinOpSequenceFusion() { add_matcher(); add_matcher(); diff --git a/src/common/transformations/include/transformations/common_optimizations/lora_subgraph_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/lora_subgraph_fusion.hpp index 8422ad95f262c6..e31f99fdb63872 100644 --- a/src/common/transformations/include/transformations/common_optimizations/lora_subgraph_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/lora_subgraph_fusion.hpp @@ -20,6 +20,6 @@ class TRANSFORMATIONS_API LoraSubgraphFusion; class ov::pass::LoraSubgraphFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("LoraSubgraphFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("LoraSubgraphFusion"); LoraSubgraphFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/lstm_cell_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/lstm_cell_fusion.hpp index 947e39edcfd0e0..2acbbf626cd6f3 100644 --- a/src/common/transformations/include/transformations/common_optimizations/lstm_cell_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/lstm_cell_fusion.hpp @@ -24,7 +24,7 @@ class TRANSFORMATIONS_API LSTMCellFusionWithSplitWeights; */ class ov::pass::LSTMCellFusionWithJointWeights : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("LSTMCellFusionWithJointWeights", "0"); + OPENVINO_MATCHER_PASS_RTTI("LSTMCellFusionWithJointWeights"); LSTMCellFusionWithJointWeights(); }; @@ -35,7 +35,7 @@ class ov::pass::LSTMCellFusionWithJointWeights : public ov::pass::MatcherPass { */ class ov::pass::LSTMCellFusionWithSplitWeights : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("LSTMCellFusionWithSplitWeights", "0"); + OPENVINO_MATCHER_PASS_RTTI("LSTMCellFusionWithSplitWeights"); LSTMCellFusionWithSplitWeights(); }; @@ -45,7 +45,7 @@ class ov::pass::LSTMCellFusionWithSplitWeights : public ov::pass::MatcherPass { */ class ov::pass::LSTMCellFusion : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("LSTMCellFusion", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("LSTMCellFusion"); LSTMCellFusion() { add_matcher(); add_matcher(); diff --git a/src/common/transformations/include/transformations/common_optimizations/mark_rope_input_to_keep_in_mixed_precision.hpp b/src/common/transformations/include/transformations/common_optimizations/mark_rope_input_to_keep_in_mixed_precision.hpp index c555b991de07ba..09db8db879dbeb 100644 --- a/src/common/transformations/include/transformations/common_optimizations/mark_rope_input_to_keep_in_mixed_precision.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/mark_rope_input_to_keep_in_mixed_precision.hpp @@ -27,7 +27,7 @@ namespace pass { class TRANSFORMATIONS_API MarkRopeInputsToKeepInMixedPrecision : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MarkRopeInputsToKeepInMixedPrecision", "0"); + OPENVINO_MATCHER_PASS_RTTI("MarkRopeInputsToKeepInMixedPrecision"); MarkRopeInputsToKeepInMixedPrecision(); private: @@ -35,4 +35,4 @@ class TRANSFORMATIONS_API MarkRopeInputsToKeepInMixedPrecision : public ov::pass }; } // namespace pass -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/common/transformations/include/transformations/common_optimizations/matmul_const_transposes_extraction.hpp b/src/common/transformations/include/transformations/common_optimizations/matmul_const_transposes_extraction.hpp index 7626be5f877527..313f5cd244a32e 100644 --- a/src/common/transformations/include/transformations/common_optimizations/matmul_const_transposes_extraction.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/matmul_const_transposes_extraction.hpp @@ -18,7 +18,7 @@ namespace pass { class TRANSFORMATIONS_API MatMulConstTransposesExtraction : public MatcherPass { public: - OPENVINO_RTTI("MatMulConstTransposesExtraction", "0"); + OPENVINO_MATCHER_PASS_RTTI("MatMulConstTransposesExtraction"); MatMulConstTransposesExtraction(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/matmul_multiply_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/matmul_multiply_fusion.hpp index 7994a04be7972d..767f44a8393e74 100644 --- a/src/common/transformations/include/transformations/common_optimizations/matmul_multiply_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/matmul_multiply_fusion.hpp @@ -58,6 +58,6 @@ class TRANSFORMATIONS_API MatMulMultiplyFusion; */ class ov::pass::MatMulMultiplyFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MatMulMultiplyFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("MatMulMultiplyFusion"); MatMulMultiplyFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/mish_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/mish_fusion.hpp index 8dc6e0149c881f..c905ee2f336232 100644 --- a/src/common/transformations/include/transformations/common_optimizations/mish_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/mish_fusion.hpp @@ -25,6 +25,6 @@ class TRANSFORMATIONS_API MishFusion; */ class ov::pass::MishFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MishFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("MishFusion"); MishFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/move_eltwise_up_data_movement.hpp b/src/common/transformations/include/transformations/common_optimizations/move_eltwise_up_data_movement.hpp index 4f704b089190a4..dd303ed1bfec45 100644 --- a/src/common/transformations/include/transformations/common_optimizations/move_eltwise_up_data_movement.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/move_eltwise_up_data_movement.hpp @@ -29,7 +29,7 @@ namespace pass { /// └────────────────┘ └────────────────┘ class TRANSFORMATIONS_API MoveEltwiseUpThroughDataMovScalar : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MoveEltwiseUpThroughDataMovScalar", "0"); + OPENVINO_MATCHER_PASS_RTTI("MoveEltwiseUpThroughDataMovScalar"); MoveEltwiseUpThroughDataMovScalar(std::vector allowed_data_movement_ops); }; @@ -50,13 +50,13 @@ class TRANSFORMATIONS_API MoveEltwiseUpThroughDataMovScalar : public ov::pass::M /// └────────────────┘ └────────────────────┘ └───────────┘ └─────────────┘ class TRANSFORMATIONS_API MoveEltwiseUpThroughDataMovPerChannel : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MoveEltwiseUpThroughDataMovPerChannel", "0"); + OPENVINO_MATCHER_PASS_RTTI("MoveEltwiseUpThroughDataMovPerChannel"); MoveEltwiseUpThroughDataMovPerChannel(); }; class TRANSFORMATIONS_API MoveEltwiseUpThroughDataMov : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("MoveEltwiseUpThroughDataMov", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("MoveEltwiseUpThroughDataMov"); MoveEltwiseUpThroughDataMov(std::vector allowed_data_movement_ops = get_default_allowed_ops()) { this->add_matcher(allowed_data_movement_ops); this->add_matcher(); diff --git a/src/common/transformations/include/transformations/common_optimizations/mul_conv_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/mul_conv_fusion.hpp index 4fae74debc1014..84fe28e512549c 100644 --- a/src/common/transformations/include/transformations/common_optimizations/mul_conv_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/mul_conv_fusion.hpp @@ -80,24 +80,24 @@ class TRANSFORMATIONS_API MultiplyGroupConvolutionBackpropDataFusion; class ov::pass::MultiplyConvolutionFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MultiplyConvolutionFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("MultiplyConvolutionFusion"); MultiplyConvolutionFusion(); }; class ov::pass::MultiplyGroupConvolutionFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MultiplyGroupConvolutionFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("MultiplyGroupConvolutionFusion"); MultiplyGroupConvolutionFusion(); }; class ov::pass::MultiplyConvolutionBackpropDataFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MultiplyConvolutionBackpropDataFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("MultiplyConvolutionBackpropDataFusion"); MultiplyConvolutionBackpropDataFusion(); }; class ov::pass::MultiplyGroupConvolutionBackpropDataFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MultiplyGroupConvolutionBackpropDataFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("MultiplyGroupConvolutionBackpropDataFusion"); MultiplyGroupConvolutionBackpropDataFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/mul_fake_quantize_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/mul_fake_quantize_fusion.hpp index f66e52f82c6c0e..e5afe2b7ace09c 100644 --- a/src/common/transformations/include/transformations/common_optimizations/mul_fake_quantize_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/mul_fake_quantize_fusion.hpp @@ -27,6 +27,6 @@ class TRANSFORMATIONS_API MulFakeQuantizeFusion; */ class ov::pass::MulFakeQuantizeFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MulFakeQuantizeFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("MulFakeQuantizeFusion"); MulFakeQuantizeFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/mvn_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/mvn_fusion.hpp index dc8561d89e18ab..5433a081768090 100644 --- a/src/common/transformations/include/transformations/common_optimizations/mvn_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/mvn_fusion.hpp @@ -28,7 +28,7 @@ class TRANSFORMATIONS_API MVNFusionWithConstantsInside; */ class ov::pass::MVNFusionWithoutConstants : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MVNFusionWithoutConstants", "0"); + OPENVINO_MATCHER_PASS_RTTI("MVNFusionWithoutConstants"); MVNFusionWithoutConstants(); }; @@ -40,7 +40,7 @@ class ov::pass::MVNFusionWithoutConstants : public ov::pass::MatcherPass { */ class ov::pass::MVNFusionWithConstantsInside : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MVNFusionWithConstantsInside", "0"); + OPENVINO_MATCHER_PASS_RTTI("MVNFusionWithConstantsInside"); MVNFusionWithConstantsInside(); }; @@ -50,7 +50,7 @@ class ov::pass::MVNFusionWithConstantsInside : public ov::pass::MatcherPass { */ class ov::pass::MVNFusion : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("MVNFusion", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("MVNFusion"); MVNFusion() { add_matcher(); add_matcher(); diff --git a/src/common/transformations/include/transformations/common_optimizations/nearest_neighbor_upsampling_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/nearest_neighbor_upsampling_fusion.hpp index 04b45ee6c67cd3..338e48afd77f64 100644 --- a/src/common/transformations/include/transformations/common_optimizations/nearest_neighbor_upsampling_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/nearest_neighbor_upsampling_fusion.hpp @@ -25,6 +25,6 @@ class TRANSFORMATIONS_API NearestNeighborUpsamplingFusion; */ class ov::pass::NearestNeighborUpsamplingFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("NearestNeighborUpsamplingFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("NearestNeighborUpsamplingFusion"); NearestNeighborUpsamplingFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/nonzero_horizontal_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/nonzero_horizontal_fusion.hpp index d579cb0c8a9ec1..e0ad6ccb98453a 100644 --- a/src/common/transformations/include/transformations/common_optimizations/nonzero_horizontal_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/nonzero_horizontal_fusion.hpp @@ -23,6 +23,6 @@ class TRANSFORMATIONS_API NonZeroHorizontalFusion; */ class ov::pass::NonZeroHorizontalFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("NonZeroHorizontalFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("NonZeroHorizontalFusion"); NonZeroHorizontalFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/nop_elimination.hpp b/src/common/transformations/include/transformations/common_optimizations/nop_elimination.hpp index 76190906d157e2..55cb8eeb2cf0c1 100644 --- a/src/common/transformations/include/transformations/common_optimizations/nop_elimination.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/nop_elimination.hpp @@ -39,7 +39,7 @@ class TRANSFORMATIONS_API PrepareShapeOpsForEliminationAroundBE; */ class ov::pass::EliminateReduceReshape : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateReduceReshape", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateReduceReshape"); EliminateReduceReshape(); }; @@ -49,7 +49,7 @@ class ov::pass::EliminateReduceReshape : public ov::pass::MatcherPass { */ class ov::pass::EliminatePad : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminatePad", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminatePad"); EliminatePad(); }; @@ -59,7 +59,7 @@ class ov::pass::EliminatePad : public ov::pass::MatcherPass { */ class ov::pass::EliminateConvert : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateConvert", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateConvert"); EliminateConvert(); }; @@ -69,7 +69,7 @@ class ov::pass::EliminateConvert : public ov::pass::MatcherPass { */ class ov::pass::EliminateConvertNonZero : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateConvertNonZero", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateConvertNonZero"); EliminateConvertNonZero(); }; @@ -79,7 +79,7 @@ class ov::pass::EliminateConvertNonZero : public ov::pass::MatcherPass { */ class ov::pass::EliminateConcat : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateConcat", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateConcat"); EliminateConcat(); }; @@ -89,7 +89,7 @@ class ov::pass::EliminateConcat : public ov::pass::MatcherPass { */ class ov::pass::EliminateSplit : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateSplit", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateSplit"); EliminateSplit(); }; @@ -99,7 +99,7 @@ class ov::pass::EliminateSplit : public ov::pass::MatcherPass { */ class ov::pass::EliminateSqueeze : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateSqueeze", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateSqueeze"); EliminateSqueeze(); }; @@ -109,7 +109,7 @@ class ov::pass::EliminateSqueeze : public ov::pass::MatcherPass { */ class ov::pass::EliminateUnsqueeze : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateUnsqueeze", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateUnsqueeze"); EliminateUnsqueeze(); }; @@ -119,7 +119,7 @@ class ov::pass::EliminateUnsqueeze : public ov::pass::MatcherPass { */ class ov::pass::EliminateTranspose : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateTranspose", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateTranspose"); EliminateTranspose(); }; @@ -129,7 +129,7 @@ class ov::pass::EliminateTranspose : public ov::pass::MatcherPass { */ class ov::pass::EliminateEltwise : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateEltwise", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateEltwise"); EliminateEltwise(); }; @@ -139,13 +139,13 @@ class ov::pass::EliminateEltwise : public ov::pass::MatcherPass { */ class ov::pass::EliminateScatterUpdate : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateScatterUpdate", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateScatterUpdate"); EliminateScatterUpdate(); }; class ov::pass::NopElimination : public GraphRewrite { public: - OPENVINO_RTTI("NopElimination", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("NopElimination"); NopElimination(bool use_shape_for_elimination = true); }; @@ -155,7 +155,7 @@ class ov::pass::NopElimination : public GraphRewrite { */ class ov::pass::EliminateSplitConcat : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateSplitConcat", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateSplitConcat"); EliminateSplitConcat(); }; @@ -165,7 +165,7 @@ class ov::pass::EliminateSplitConcat : public ov::pass::MatcherPass { */ class ov::pass::EliminateNopBroadcast : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateNopBroadcast", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateNopBroadcast"); EliminateNopBroadcast(); }; @@ -177,7 +177,7 @@ class ov::pass::EliminateNopBroadcast : public ov::pass::MatcherPass { */ class ov::pass::EliminateSliceBeforeGatherElements : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateSliceBeforeGatherElements", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateSliceBeforeGatherElements"); EliminateSliceBeforeGatherElements(); }; @@ -188,7 +188,7 @@ class ov::pass::EliminateSliceBeforeGatherElements : public ov::pass::MatcherPas */ class ov::pass::EliminateStridedSlice : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateStridedSlice", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateStridedSlice"); EliminateStridedSlice(); }; @@ -199,7 +199,7 @@ class ov::pass::EliminateStridedSlice : public ov::pass::MatcherPass { */ class ov::pass::EliminateSlice : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateSlice", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateSlice"); EliminateSlice(); }; @@ -210,7 +210,7 @@ class ov::pass::EliminateSlice : public ov::pass::MatcherPass { */ class ov::pass::EliminateStridedSliceByShape : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EliminateStridedSliceByShape", "0"); + OPENVINO_MATCHER_PASS_RTTI("EliminateStridedSliceByShape"); EliminateStridedSliceByShape(); }; @@ -222,6 +222,6 @@ class ov::pass::EliminateStridedSliceByShape : public ov::pass::MatcherPass { */ class ov::pass::PrepareShapeOpsForEliminationAroundBE : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PrepareShapeOpsForEliminationAroundBE", "0"); + OPENVINO_MATCHER_PASS_RTTI("PrepareShapeOpsForEliminationAroundBE"); PrepareShapeOpsForEliminationAroundBE(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/normalize_l2_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/normalize_l2_fusion.hpp index 39355ec6af5ec4..01ac902140f01b 100644 --- a/src/common/transformations/include/transformations/common_optimizations/normalize_l2_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/normalize_l2_fusion.hpp @@ -31,6 +31,6 @@ class TRANSFORMATIONS_API NormalizeL2Fusion; */ class ov::pass::NormalizeL2Fusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("NormalizeL2Fusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("NormalizeL2Fusion"); NormalizeL2Fusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/optimize_strided_slice.hpp b/src/common/transformations/include/transformations/common_optimizations/optimize_strided_slice.hpp index cb642795254791..961c5e893e8119 100644 --- a/src/common/transformations/include/transformations/common_optimizations/optimize_strided_slice.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/optimize_strided_slice.hpp @@ -71,7 +71,7 @@ class ov::pass::GroupedSliceToVSplitOptimization : public ov::pass::ModelPass { */ class ov::pass::SliceSequenceToSingleSlice : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SliceSequenceToSingleSlice", "0"); + OPENVINO_MATCHER_PASS_RTTI("SliceSequenceToSingleSlice"); SliceSequenceToSingleSlice(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/pad_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/pad_fusion.hpp index 799fe0a8a4ea81..628391547da82c 100644 --- a/src/common/transformations/include/transformations/common_optimizations/pad_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/pad_fusion.hpp @@ -30,7 +30,7 @@ class TRANSFORMATIONS_API PadFusionGroupConvolutionBackpropData; */ class ov::pass::PadFusionAvgPool : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PadFusionAvgPool", "0"); + OPENVINO_MATCHER_PASS_RTTI("PadFusionAvgPool"); PadFusionAvgPool(); }; @@ -43,7 +43,7 @@ class ov::pass::PadFusionAvgPool : public ov::pass::MatcherPass { */ class ov::pass::PadFusionConvolution : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PadFusionConvolution", "0"); + OPENVINO_MATCHER_PASS_RTTI("PadFusionConvolution"); PadFusionConvolution(); }; @@ -57,7 +57,7 @@ class ov::pass::PadFusionConvolution : public ov::pass::MatcherPass { */ class ov::pass::PadFusionConvolutionBackpropData : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PadFusionConvolutionBackpropData", "0"); + OPENVINO_MATCHER_PASS_RTTI("PadFusionConvolutionBackpropData"); PadFusionConvolutionBackpropData(); }; @@ -70,7 +70,7 @@ class ov::pass::PadFusionConvolutionBackpropData : public ov::pass::MatcherPass */ class ov::pass::PadFusionGroupConvolution : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PadFusionGroupConvolution", "0"); + OPENVINO_MATCHER_PASS_RTTI("PadFusionGroupConvolution"); PadFusionGroupConvolution(); }; @@ -84,13 +84,13 @@ class ov::pass::PadFusionGroupConvolution : public ov::pass::MatcherPass { */ class ov::pass::PadFusionGroupConvolutionBackpropData : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PadFusionGroupConvolutionBackpropData", "0"); + OPENVINO_MATCHER_PASS_RTTI("PadFusionGroupConvolutionBackpropData"); PadFusionGroupConvolutionBackpropData(); }; class ov::pass::PadFusion : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("PadFusion", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("PadFusion"); PadFusion() { add_matcher(); add_matcher(); diff --git a/src/common/transformations/include/transformations/common_optimizations/prelu_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/prelu_fusion.hpp index b2ba7ca9447450..729d32375c9eb1 100644 --- a/src/common/transformations/include/transformations/common_optimizations/prelu_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/prelu_fusion.hpp @@ -41,7 +41,7 @@ class TRANSFORMATIONS_API PReluFusionNegReluMulAdd; */ class ov::pass::PReluFusionNegativeAdd : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PReluFusionNegativeAdd", "0"); + OPENVINO_MATCHER_PASS_RTTI("PReluFusionNegativeAdd"); PReluFusionNegativeAdd(); }; @@ -60,7 +60,7 @@ class ov::pass::PReluFusionNegativeAdd : public ov::pass::MatcherPass { */ class ov::pass::PReluFusionNegativeSub : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PReluFusionNegativeSub", "0"); + OPENVINO_MATCHER_PASS_RTTI("PReluFusionNegativeSub"); PReluFusionNegativeSub(); }; @@ -79,7 +79,7 @@ class ov::pass::PReluFusionNegativeSub : public ov::pass::MatcherPass { */ class ov::pass::PReluFusionMultiplyAdd : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PReluFusionMultiplyAdd", "0"); + OPENVINO_MATCHER_PASS_RTTI("PReluFusionMultiplyAdd"); PReluFusionMultiplyAdd(); }; @@ -98,7 +98,7 @@ class ov::pass::PReluFusionMultiplyAdd : public ov::pass::MatcherPass { */ class ov::pass::PReluFusionMultiplySub : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PReluFusionMultiplySub", "0"); + OPENVINO_MATCHER_PASS_RTTI("PReluFusionMultiplySub"); PReluFusionMultiplySub(); }; @@ -119,7 +119,7 @@ class ov::pass::PReluFusionMultiplySub : public ov::pass::MatcherPass { */ class ov::pass::PReluFusionAbsSubMulMulAdd : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PReluFusionAbsSubMulMulAdd", "0"); + OPENVINO_MATCHER_PASS_RTTI("PReluFusionAbsSubMulMulAdd"); PReluFusionAbsSubMulMulAdd(); }; @@ -138,7 +138,7 @@ class ov::pass::PReluFusionAbsSubMulMulAdd : public ov::pass::MatcherPass { */ class ov::pass::PReluFusionNegReluMulAdd : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PReluFusionNegReluMulAdd", "0"); + OPENVINO_MATCHER_PASS_RTTI("PReluFusionNegReluMulAdd"); PReluFusionNegReluMulAdd(); }; @@ -148,7 +148,7 @@ class ov::pass::PReluFusionNegReluMulAdd : public ov::pass::MatcherPass { */ class ov::pass::PReluFusion : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("PReluFusion", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("PReluFusion"); PReluFusion() { add_matcher(); add_matcher(); diff --git a/src/common/transformations/include/transformations/common_optimizations/pull_through_reduce.hpp b/src/common/transformations/include/transformations/common_optimizations/pull_through_reduce.hpp index c31902cd478bfa..86f54e9dc03ac5 100644 --- a/src/common/transformations/include/transformations/common_optimizations/pull_through_reduce.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/pull_through_reduce.hpp @@ -25,7 +25,7 @@ class TRANSFORMATIONS_API PullReshapeThroughReduce; */ class ov::pass::PullUnsqueezeThroughReduce : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PullUnsqueezeThroughReduce", "0"); + OPENVINO_MATCHER_PASS_RTTI("PullUnsqueezeThroughReduce"); PullUnsqueezeThroughReduce(); }; @@ -37,7 +37,7 @@ class ov::pass::PullUnsqueezeThroughReduce : public ov::pass::MatcherPass { */ class ov::pass::PullReshapeThroughReduce : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PullReshapeThroughReduce", "0"); + OPENVINO_MATCHER_PASS_RTTI("PullReshapeThroughReduce"); PullReshapeThroughReduce(); }; @@ -49,7 +49,7 @@ class ov::pass::PullReshapeThroughReduce : public ov::pass::MatcherPass { */ class ov::pass::PullThroughReduce : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("PullThroughReduce", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("PullThroughReduce"); PullThroughReduce() { add_matcher(); add_matcher(); diff --git a/src/common/transformations/include/transformations/common_optimizations/pull_transpose_through_fq.hpp b/src/common/transformations/include/transformations/common_optimizations/pull_transpose_through_fq.hpp index c4aa71724a07a3..5e92d0eab4247c 100644 --- a/src/common/transformations/include/transformations/common_optimizations/pull_transpose_through_fq.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/pull_transpose_through_fq.hpp @@ -20,6 +20,6 @@ class TRANSFORMATIONS_API PullTransposeThroughFQUp; class ov::pass::PullTransposeThroughFQUp : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PullTransposeThroughFQUp", "0"); + OPENVINO_MATCHER_PASS_RTTI("PullTransposeThroughFQUp"); PullTransposeThroughFQUp(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/random_uniform_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/random_uniform_fusion.hpp index 198666e9a02673..1f4c76ee810612 100644 --- a/src/common/transformations/include/transformations/common_optimizations/random_uniform_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/random_uniform_fusion.hpp @@ -23,6 +23,6 @@ class TRANSFORMATIONS_API RandomUniformFusion; */ class ov::pass::RandomUniformFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("RandomUniformFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("RandomUniformFusion"); RandomUniformFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/reduce_merge.hpp b/src/common/transformations/include/transformations/common_optimizations/reduce_merge.hpp index 20daf9173b87c8..a24ce14c43aaeb 100644 --- a/src/common/transformations/include/transformations/common_optimizations/reduce_merge.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/reduce_merge.hpp @@ -64,6 +64,6 @@ class TRANSFORMATIONS_API ReduceMerge; */ class ov::pass::ReduceMerge : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ReduceMerge", "0"); + OPENVINO_MATCHER_PASS_RTTI("ReduceMerge"); ReduceMerge(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/reduce_reshape_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/reduce_reshape_fusion.hpp index 2827be2cdb5738..b7c5978e1458e1 100644 --- a/src/common/transformations/include/transformations/common_optimizations/reduce_reshape_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/reduce_reshape_fusion.hpp @@ -22,6 +22,6 @@ class TRANSFORMATIONS_API ReduceReshapeFusion; */ class ov::pass::ReduceReshapeFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ReduceReshapeFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("ReduceReshapeFusion"); ReduceReshapeFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/relu_fake_quantize_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/relu_fake_quantize_fusion.hpp index 8d2895b378c774..04ffa7ddb4b8b0 100644 --- a/src/common/transformations/include/transformations/common_optimizations/relu_fake_quantize_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/relu_fake_quantize_fusion.hpp @@ -25,6 +25,6 @@ class TRANSFORMATIONS_API ReluFakeQuantizeFusion; class ov::pass::ReluFakeQuantizeFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ReluFakeQuantizeFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("ReluFakeQuantizeFusion"); ReluFakeQuantizeFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/remove_concat_zero_dim_input.hpp b/src/common/transformations/include/transformations/common_optimizations/remove_concat_zero_dim_input.hpp index 5c746cd4dde987..881fd9cb23e9c3 100644 --- a/src/common/transformations/include/transformations/common_optimizations/remove_concat_zero_dim_input.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/remove_concat_zero_dim_input.hpp @@ -24,7 +24,7 @@ class OPENVINO_API DisableRemoveConcatZeroDimInput; class RemoveConcatZeroDimInput : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("RemoveConcatZeroDimInput", "0"); + OPENVINO_MATCHER_PASS_RTTI("RemoveConcatZeroDimInput"); RemoveConcatZeroDimInput(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/remove_filtering_boxes_by_size.hpp b/src/common/transformations/include/transformations/common_optimizations/remove_filtering_boxes_by_size.hpp index 40dfb824d2ece2..0965f06a465770 100644 --- a/src/common/transformations/include/transformations/common_optimizations/remove_filtering_boxes_by_size.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/remove_filtering_boxes_by_size.hpp @@ -21,12 +21,12 @@ class TRANSFORMATIONS_API RemoveFilteringBoxesBySize; class ov::pass::FuseFilteringBoxesBySize : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("FuseFilteringBoxesBySize", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("FuseFilteringBoxesBySize"); FuseFilteringBoxesBySize(); }; class ov::pass::RemoveFilteringBoxesBySize : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("RemoveFilteringBoxesBySize", "0"); + OPENVINO_MATCHER_PASS_RTTI("RemoveFilteringBoxesBySize"); RemoveFilteringBoxesBySize(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/reshape_prelu.hpp b/src/common/transformations/include/transformations/common_optimizations/reshape_prelu.hpp index 9ced2036d9906b..6c5a629a2fa840 100644 --- a/src/common/transformations/include/transformations/common_optimizations/reshape_prelu.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/reshape_prelu.hpp @@ -22,6 +22,6 @@ class TRANSFORMATIONS_API ReshapePRelu; class ov::pass::ReshapePRelu : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ReshapePRelu", "0"); + OPENVINO_MATCHER_PASS_RTTI("ReshapePRelu"); ReshapePRelu(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/reshape_sequence_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/reshape_sequence_fusion.hpp index 5fa22e7feb0fe6..5aaed4a6be32ad 100644 --- a/src/common/transformations/include/transformations/common_optimizations/reshape_sequence_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/reshape_sequence_fusion.hpp @@ -23,6 +23,6 @@ class TRANSFORMATIONS_API ReshapeSequenceFusion; class ov::pass::ReshapeSequenceFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ReshapeSequenceFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("ReshapeSequenceFusion"); ReshapeSequenceFusion(bool use_shape_for_elimination = true); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/rms_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/rms_fusion.hpp index d8cb02a596ab6c..0a63b3cb6e9a7c 100644 --- a/src/common/transformations/include/transformations/common_optimizations/rms_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/rms_fusion.hpp @@ -29,7 +29,7 @@ namespace pass { class RMSFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("RMSFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("RMSFusion"); RMSFusion(bool force_tail_convert = true); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/sdpa_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/sdpa_fusion.hpp new file mode 100644 index 00000000000000..84383b777604ea --- /dev/null +++ b/src/common/transformations/include/transformations/common_optimizations/sdpa_fusion.hpp @@ -0,0 +1,60 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/matcher_pass.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +/// This pass transforms the following sub-graph to a single Scaled Dot Product Attention operation. +/// Before: +/// ┌───────┐ ┌───────┐ ┌───────┐ +/// │ Q │ │ K │ │ V │ +/// └───┬───┘ └───┬───┘ └───┬───┘ +/// │ │ │ +/// │ │ │ +/// ┌───┴───┐ ┌─────┴──────┐ │ +/// │ MatMul│<──│ Transpose │ │ +/// └───┬───┘ | (Optional) │ │ +/// │ └────────────┘ │ +/// ┌───┴───┐ ┌─────────────┐ │ +/// │ Add │<───│AttentionMask│ │ +/// └───┬───┘ | (Optional) │ │ +/// │ └─────────────┘ │ +/// ┌───┴───┐ │ +/// │Softmax│ │ +/// └───┬───┘ │ +/// │ │ +/// ┌───┴───┐ │ +/// │ MatMul│<─────────────────────┘ +/// └───┬───┘ +/// ┌───┴───┐ +/// │ Output│ +/// └───────┘ +/// +/// After: +/// ┌───────┐ ┌───────┐ ┌───────┐ ┌─────────────┐ +/// │ Q │ │ K │ │ V │ │AttentionMask│ +/// └───┬───┘ └───┬───┘ └───┬───┘ └──────┬──────┘ +/// │ │ │ │ +/// │ │ │ │ +/// ┌───┴────────────┴────────────┴───────────────┴─┐ +/// │ ScaledDotProductAttention │ +/// └────────────────────┬──────────────────────────┘ +/// │ +/// │ +/// ┌────┴────┐ +/// │ Output │ +/// └─────────┘ +class TRANSFORMATIONS_API SDPAFusion : public ov::pass::MatcherPass { +public: + OPENVINO_MATCHER_PASS_RTTI("SDPAFusion", "0"); + SDPAFusion(); +}; + +} // namespace pass +} // namespace ov diff --git a/src/common/transformations/include/transformations/common_optimizations/sdpa_scale_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/sdpa_scale_fusion.hpp new file mode 100644 index 00000000000000..cae0363e785f4e --- /dev/null +++ b/src/common/transformations/include/transformations/common_optimizations/sdpa_scale_fusion.hpp @@ -0,0 +1,58 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/matcher_pass.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +/// Merges explicit multiplication by scalar value for Q and K into scale attribute of SDPA op +/// Before: +/// ┌───────┐ ┌───────┐ ┌───────┐ ┌─────────────┐ ┌─────────────┐ +/// │ Q │ │ K │ │ V │ │AttentionMask│ │ Scale | +/// └───┬───┘ └───┬───┘ └───┬───┘ │ (Optional) │ │ (Optional) │ +/// │ │ │ └──────┬──────┘ └───────┬─────┘ +/// │ │ │ │ | +/// ┌───┴───┐ ┌───┴───┐ │ │ | +/// │ Mul | │ Mul │ | │ | +/// └───┬───┘ └───┬───┘ │ │ │ +/// │ │ │ │ │ +/// | │ │ │ │ +/// ┌───┴────────────┴────────────┴─────────────┴─┐ | +/// │ ScaledDotProductAttention │──────────────────┘ +/// └────────────────────┬────────────────────────┘ +/// │ +/// │ +/// ┌────┴────┐ +/// │ Output │ +/// └─────────┘ +/// After: +/// ┌───────┐ ┌───────┐ ┌───────┐ ┌─────────────┐ ┌───────┐ +/// │ Q │ │ K │ │ V │ │AttentionMask│ │ Scale | +/// └───┬───┘ └───┬───┘ └───┬───┘ └──────┬──────┘ └───┬───┘ +/// │ │ │ │ | +/// │ │ │ │ | +/// | │ │ │ | +/// ┌───┴────────────┴────────────┴─────────────┴─┐ | +/// │ ScaledDotProductAttention │───────────┘ +/// └────────────────────┬────────────────────────┘ +/// │ +/// │ +/// ┌────┴────┐ +/// │ Output │ +/// └─────────┘ +/// Multiply ops for Q and K are eliminated in the following cases: +/// 1. Q_scale and K_scale are constant +/// 2. Q_scale * SDPA_Scale == 1 or K_scale * SDPA_Scale == 1 +class TRANSFORMATIONS_API SDPAScaleFusion : public ov::pass::MatcherPass { +public: + OPENVINO_MATCHER_PASS_RTTI("SDPAScaleFusion", "0"); + SDPAScaleFusion(); +}; + +} // namespace pass +} // namespace ov diff --git a/src/common/transformations/include/transformations/common_optimizations/select_with_one_value_condition.hpp b/src/common/transformations/include/transformations/common_optimizations/select_with_one_value_condition.hpp index 5278e17d07ff64..1146565efa1b48 100644 --- a/src/common/transformations/include/transformations/common_optimizations/select_with_one_value_condition.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/select_with_one_value_condition.hpp @@ -26,6 +26,6 @@ class TRANSFORMATIONS_API SelectWithOneValueCondition; class ov::pass::SelectWithOneValueCondition : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SelectWithOneValueCondition", "0"); + OPENVINO_MATCHER_PASS_RTTI("SelectWithOneValueCondition"); SelectWithOneValueCondition(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/sequence_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/sequence_fusion.hpp index ecba41990e28dd..a4f432b8bd584f 100644 --- a/src/common/transformations/include/transformations/common_optimizations/sequence_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/sequence_fusion.hpp @@ -30,6 +30,6 @@ class TRANSFORMATIONS_API SequenceFusion; class ov::pass::SequenceFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SequenceFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("SequenceFusion"); SequenceFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/shuffle_channels_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/shuffle_channels_fusion.hpp index 2304fe637b4551..b88cbdb64731a0 100644 --- a/src/common/transformations/include/transformations/common_optimizations/shuffle_channels_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/shuffle_channels_fusion.hpp @@ -34,6 +34,6 @@ class TRANSFORMATIONS_API ShuffleChannelsFusion; class ov::pass::ShuffleChannelsFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ShuffleChannelsFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("ShuffleChannelsFusion"); ShuffleChannelsFusion(const bool reshape_constants_check); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp b/src/common/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp index 79e0ffd789bf7c..66198e60f3a564 100644 --- a/src/common/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp @@ -31,7 +31,7 @@ class TRANSFORMATIONS_API AbsSinking; */ class ov::pass::GroupedGatherElimination : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("GroupedGatherElimination", "0"); + OPENVINO_MATCHER_PASS_RTTI("GroupedGatherElimination"); GroupedGatherElimination(); }; @@ -55,7 +55,7 @@ class ov::pass::SimplifyShapeOfSubGraph : public ov::pass::ModelPass { */ class ov::pass::GatherNopElimination : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("GatherNopElimination", "0"); + OPENVINO_MATCHER_PASS_RTTI("GatherNopElimination"); GatherNopElimination(); }; @@ -67,7 +67,7 @@ class ov::pass::GatherNopElimination : public ov::pass::MatcherPass { */ class ov::pass::SimplifyGatherShapeOf : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SimplifyGatherShapeOf", "0"); + OPENVINO_MATCHER_PASS_RTTI("SimplifyGatherShapeOf"); SimplifyGatherShapeOf(); }; @@ -78,7 +78,7 @@ class ov::pass::SimplifyGatherShapeOf : public ov::pass::MatcherPass { */ class ov::pass::SimplifySecondInputOfReshape : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SimplifySecondInputOfReshape", "0"); + OPENVINO_MATCHER_PASS_RTTI("SimplifySecondInputOfReshape"); SimplifySecondInputOfReshape(); }; @@ -90,6 +90,6 @@ class ov::pass::SimplifySecondInputOfReshape : public ov::pass::MatcherPass { */ class ov::pass::AbsSinking : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("AbsSinking", "0"); + OPENVINO_MATCHER_PASS_RTTI("AbsSinking"); AbsSinking(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/skip_gather_before_transpose_and_reshape.hpp b/src/common/transformations/include/transformations/common_optimizations/skip_gather_before_transpose_and_reshape.hpp index a93fdb8f4f20fe..3e937ba217d66c 100644 --- a/src/common/transformations/include/transformations/common_optimizations/skip_gather_before_transpose_and_reshape.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/skip_gather_before_transpose_and_reshape.hpp @@ -26,6 +26,6 @@ class TRANSFORMATIONS_API SkipGatherBeforeTransposeAndReshape; */ class ov::pass::SkipGatherBeforeTransposeAndReshape : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SkipGatherBeforeTransposeAndReshape", "0"); + OPENVINO_MATCHER_PASS_RTTI("SkipGatherBeforeTransposeAndReshape"); SkipGatherBeforeTransposeAndReshape(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/softmax_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/softmax_fusion.hpp index f56a8bd1b574f7..07524e3799cf64 100644 --- a/src/common/transformations/include/transformations/common_optimizations/softmax_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/softmax_fusion.hpp @@ -103,6 +103,6 @@ class TRANSFORMATIONS_API SoftmaxFusion; class ov::pass::SoftmaxFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SoftmaxFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("SoftmaxFusion"); SoftmaxFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/softplus_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/softplus_fusion.hpp index 44d34dcdcc4c90..3dbadc5c2b6046 100644 --- a/src/common/transformations/include/transformations/common_optimizations/softplus_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/softplus_fusion.hpp @@ -25,6 +25,6 @@ class TRANSFORMATIONS_API SoftPlusFusion; */ class ov::pass::SoftPlusFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SoftPlusFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("SoftPlusFusion"); SoftPlusFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/softplus_to_mish_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/softplus_to_mish_fusion.hpp index 6a6d247e4ea351..cbc0194ad20c62 100644 --- a/src/common/transformations/include/transformations/common_optimizations/softplus_to_mish_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/softplus_to_mish_fusion.hpp @@ -25,6 +25,6 @@ class TRANSFORMATIONS_API SoftPlusToMishFusion; */ class ov::pass::SoftPlusToMishFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SoftPlusToMishFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("SoftPlusToMishFusion"); SoftPlusToMishFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/space_to_batch_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/space_to_batch_fusion.hpp index 43cf4654628cf5..aa95f690512b42 100644 --- a/src/common/transformations/include/transformations/common_optimizations/space_to_batch_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/space_to_batch_fusion.hpp @@ -32,6 +32,6 @@ class TRANSFORMATIONS_API SpaceToBatchFusion; class ov::pass::SpaceToBatchFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SpaceToBatchFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("SpaceToBatchFusion"); SpaceToBatchFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/split_concat_pair_to_interpolate_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/split_concat_pair_to_interpolate_fusion.hpp index e9ecba4fe6e961..72887dd32d9008 100644 --- a/src/common/transformations/include/transformations/common_optimizations/split_concat_pair_to_interpolate_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/split_concat_pair_to_interpolate_fusion.hpp @@ -25,6 +25,6 @@ class TRANSFORMATIONS_API SplitConcatPairToInterpolateFusion; */ class ov::pass::SplitConcatPairToInterpolateFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SplitConcatPairToInterpolateFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("SplitConcatPairToInterpolateFusion"); SplitConcatPairToInterpolateFusion(bool use_shape_for_elimination = true); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/split_squeeze_concat_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/split_squeeze_concat_fusion.hpp index 24073c6a61e2dc..3cd3c9429be0f1 100644 --- a/src/common/transformations/include/transformations/common_optimizations/split_squeeze_concat_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/split_squeeze_concat_fusion.hpp @@ -25,6 +25,6 @@ class TRANSFORMATIONS_API SplitSqueezeConcatFusion; */ class ov::pass::SplitSqueezeConcatFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SplitSqueezeConcatFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("SplitSqueezeConcatFusion"); SplitSqueezeConcatFusion(bool use_shapes); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/strides_optimization.hpp b/src/common/transformations/include/transformations/common_optimizations/strides_optimization.hpp index 57f5036fe5faa7..acdd30580b1a23 100644 --- a/src/common/transformations/include/transformations/common_optimizations/strides_optimization.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/strides_optimization.hpp @@ -28,7 +28,7 @@ class TRANSFORMATIONS_API StridesOptimization; */ class ov::pass::ConvStridesPropagation : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ConvStridesPropagation", "0"); + OPENVINO_MATCHER_PASS_RTTI("ConvStridesPropagation"); ConvStridesPropagation(); }; @@ -40,7 +40,7 @@ class ov::pass::ConvStridesPropagation : public ov::pass::MatcherPass { */ class ov::pass::SupportedNodesStridesPropagation : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SupportedNodesStridesPropagation", "0"); + OPENVINO_MATCHER_PASS_RTTI("SupportedNodesStridesPropagation"); SupportedNodesStridesPropagation(); }; @@ -51,7 +51,7 @@ class ov::pass::SupportedNodesStridesPropagation : public ov::pass::MatcherPass */ class ov::pass::UnsupportedNodesStridesPropagation : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("UnsupportedNodesStridesPropagation", "0"); + OPENVINO_MATCHER_PASS_RTTI("UnsupportedNodesStridesPropagation"); UnsupportedNodesStridesPropagation(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/subtract_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/subtract_fusion.hpp index 69e4095b6becd7..1963f226b830be 100644 --- a/src/common/transformations/include/transformations/common_optimizations/subtract_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/subtract_fusion.hpp @@ -24,6 +24,6 @@ class TRANSFORMATIONS_API SubtractFusion; */ class ov::pass::SubtractFusion : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SubtractFusion", "0"); + OPENVINO_MATCHER_PASS_RTTI("SubtractFusion"); SubtractFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/swish_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/swish_fusion.hpp index a630e1998e84ca..d9a1ae6e321f35 100644 --- a/src/common/transformations/include/transformations/common_optimizations/swish_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/swish_fusion.hpp @@ -28,7 +28,7 @@ class TRANSFORMATIONS_API SwishFusionWithoutBeta; */ class ov::pass::SwishFusionWithSigmoid : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SwishFusionWithSigmoid", "0"); + OPENVINO_MATCHER_PASS_RTTI("SwishFusionWithSigmoid"); SwishFusionWithSigmoid(); }; @@ -38,7 +38,7 @@ class ov::pass::SwishFusionWithSigmoid : public ov::pass::MatcherPass { */ class ov::pass::SwishFusionWithSigmoidWithBeta : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SwishFusionWithSigmoidWithBeta", "0"); + OPENVINO_MATCHER_PASS_RTTI("SwishFusionWithSigmoidWithBeta"); SwishFusionWithSigmoidWithBeta(); }; @@ -48,7 +48,7 @@ class ov::pass::SwishFusionWithSigmoidWithBeta : public ov::pass::MatcherPass { */ class ov::pass::SwishFusionWithBeta : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SwishFusionWithBeta", "0"); + OPENVINO_MATCHER_PASS_RTTI("SwishFusionWithBeta"); SwishFusionWithBeta(); }; @@ -58,7 +58,7 @@ class ov::pass::SwishFusionWithBeta : public ov::pass::MatcherPass { */ class ov::pass::SwishFusionWithoutBeta : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SwishFusionWithoutBeta", "0"); + OPENVINO_MATCHER_PASS_RTTI("SwishFusionWithoutBeta"); SwishFusionWithoutBeta(); }; @@ -68,7 +68,7 @@ class ov::pass::SwishFusionWithoutBeta : public ov::pass::MatcherPass { */ class ov::pass::SwishFusion : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("SwishFusion", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("SwishFusion"); SwishFusion() { add_matcher(); add_matcher(); diff --git a/src/common/transformations/include/transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp b/src/common/transformations/include/transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp index c1299872c4b3a5..b250ce1d3c3866 100644 --- a/src/common/transformations/include/transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp @@ -26,6 +26,6 @@ class TRANSFORMATIONS_API TransposeReshapeEliminationForMatmul; */ class ov::pass::TransposeReshapeEliminationForMatmul : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("TransposeReshapeEliminationForMatmul", "0"); + OPENVINO_MATCHER_PASS_RTTI("TransposeReshapeEliminationForMatmul"); TransposeReshapeEliminationForMatmul(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/transpose_sinking.hpp b/src/common/transformations/include/transformations/common_optimizations/transpose_sinking.hpp index 8189535fe260ae..d9f5c257f6dda1 100644 --- a/src/common/transformations/include/transformations/common_optimizations/transpose_sinking.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/transpose_sinking.hpp @@ -29,7 +29,7 @@ class TRANSFORMATIONS_API TransposeFuse; */ class ov::pass::TransposeReduction : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("TransposeReduction", "0"); + OPENVINO_MATCHER_PASS_RTTI("TransposeReduction"); TransposeReduction(); }; @@ -40,7 +40,7 @@ class ov::pass::TransposeReduction : public ov::pass::MatcherPass { */ class ov::pass::TransposeFQReduction : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("TransposeFQReduction", "0"); + OPENVINO_MATCHER_PASS_RTTI("TransposeFQReduction"); TransposeFQReduction(); }; @@ -50,7 +50,7 @@ class ov::pass::TransposeFQReduction : public ov::pass::MatcherPass { */ class ov::pass::TransposeConvert : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("TransposeConvert", "0"); + OPENVINO_MATCHER_PASS_RTTI("TransposeConvert"); TransposeConvert(); }; @@ -60,7 +60,7 @@ class ov::pass::TransposeConvert : public ov::pass::MatcherPass { */ class ov::pass::TransposeEltwise : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("TransposeEltwise", "0"); + OPENVINO_MATCHER_PASS_RTTI("TransposeEltwise"); TransposeEltwise(); }; @@ -71,7 +71,7 @@ class ov::pass::TransposeEltwise : public ov::pass::MatcherPass { */ class ov::pass::TransposeFuse : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("TransposeFuse", "0"); + OPENVINO_MATCHER_PASS_RTTI("TransposeFuse"); TransposeFuse(); }; @@ -81,7 +81,7 @@ class ov::pass::TransposeFuse : public ov::pass::MatcherPass { */ class ov::pass::TransposeSinking : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("TransposeSinking", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("TransposeSinking"); TransposeSinking() { add_matcher(); add_matcher(); diff --git a/src/common/transformations/include/transformations/common_optimizations/transpose_to_reshape.hpp b/src/common/transformations/include/transformations/common_optimizations/transpose_to_reshape.hpp index aba6154bb0b58c..3a99d47858ec6f 100644 --- a/src/common/transformations/include/transformations/common_optimizations/transpose_to_reshape.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/transpose_to_reshape.hpp @@ -24,6 +24,6 @@ class TRANSFORMATIONS_API TransposeToReshape; */ class ov::pass::TransposeToReshape : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("TransposeToReshape", "0"); + OPENVINO_MATCHER_PASS_RTTI("TransposeToReshape"); TransposeToReshape(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp b/src/common/transformations/include/transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp index aa34c7859d068a..1b77d8f519d391 100644 --- a/src/common/transformations/include/transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp @@ -28,6 +28,6 @@ class TRANSFORMATIONS_API WeightsDequantizeToFakeQuantize; */ class ov::pass::WeightsDequantizeToFakeQuantize : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("WeightsDequantizeToFakeQuantize", "0"); + OPENVINO_MATCHER_PASS_RTTI("WeightsDequantizeToFakeQuantize"); WeightsDequantizeToFakeQuantize(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/wrap_interpolate_into_transposes.hpp b/src/common/transformations/include/transformations/common_optimizations/wrap_interpolate_into_transposes.hpp index 7b35498d5dde64..e0ef8b68bdaf04 100644 --- a/src/common/transformations/include/transformations/common_optimizations/wrap_interpolate_into_transposes.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/wrap_interpolate_into_transposes.hpp @@ -35,6 +35,6 @@ class TRANSFORMATIONS_API WrapInterpolateIntoTransposes; */ class ov::pass::WrapInterpolateIntoTransposes : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("WrapInterpolateIntoTransposes", "0"); + OPENVINO_MATCHER_PASS_RTTI("WrapInterpolateIntoTransposes"); WrapInterpolateIntoTransposes(); }; diff --git a/src/common/transformations/include/transformations/flush_fp32_subnormals_to_zero.hpp b/src/common/transformations/include/transformations/flush_fp32_subnormals_to_zero.hpp index 71c71b0614f29d..b9522d4a4273dd 100644 --- a/src/common/transformations/include/transformations/flush_fp32_subnormals_to_zero.hpp +++ b/src/common/transformations/include/transformations/flush_fp32_subnormals_to_zero.hpp @@ -25,6 +25,6 @@ class TRANSFORMATIONS_API FlushFP32SubnormalsToZero; */ class ov::pass::FlushFP32SubnormalsToZero : public MatcherPass { public: - OPENVINO_RTTI("FlushFP32SubnormalsToZero", "0"); + OPENVINO_MATCHER_PASS_RTTI("FlushFP32SubnormalsToZero"); FlushFP32SubnormalsToZero(); }; diff --git a/src/common/transformations/include/transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp b/src/common/transformations/include/transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp index 4f003dbc09e671..99852d0dc0df3f 100644 --- a/src/common/transformations/include/transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp +++ b/src/common/transformations/include/transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp @@ -26,7 +26,7 @@ class TRANSFORMATIONS_API MarkCompressedFloatConstants; */ class ov::pass::EnableDecompressionConvertConstantFolding : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("EnableDecompressionConvertConstantFolding", "0"); + OPENVINO_MATCHER_PASS_RTTI("EnableDecompressionConvertConstantFolding"); EnableDecompressionConvertConstantFolding(); }; @@ -36,7 +36,7 @@ class ov::pass::EnableDecompressionConvertConstantFolding : public ov::pass::Mat */ class ov::pass::DisableDecompressionConvertConstantFolding : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("DisableDecompressionConvertConstantFolding", "0"); + OPENVINO_MATCHER_PASS_RTTI("DisableDecompressionConvertConstantFolding"); DisableDecompressionConvertConstantFolding(); }; @@ -46,7 +46,7 @@ class ov::pass::DisableDecompressionConvertConstantFolding : public ov::pass::Ma */ class ov::pass::KeepConstAndDecompression : public MatcherPass { public: - OPENVINO_RTTI("KeepConstAndDecompression", "0"); + OPENVINO_MATCHER_PASS_RTTI("KeepConstAndDecompression"); KeepConstAndDecompression(); }; @@ -56,7 +56,7 @@ class ov::pass::KeepConstAndDecompression : public MatcherPass { */ class ov::pass::KeepConstantsPrecisionAndAddConverts : public MatcherPass { public: - OPENVINO_RTTI("KeepConstantsPrecisionAndAddConverts", "0"); + OPENVINO_MATCHER_PASS_RTTI("KeepConstantsPrecisionAndAddConverts"); KeepConstantsPrecisionAndAddConverts(); }; @@ -69,6 +69,6 @@ class ov::pass::KeepConstantsPrecisionAndAddConverts : public MatcherPass { */ class ov::pass::MarkCompressedFloatConstants : public MatcherPass { public: - OPENVINO_RTTI("KeepFWPrecisionFor16BitFloatConstants", "0"); + OPENVINO_MATCHER_PASS_RTTI("MarkCompressedFloatConstants"); MarkCompressedFloatConstants(); }; diff --git a/src/common/transformations/include/transformations/fp16_compression/mark_floatpoint_range.hpp b/src/common/transformations/include/transformations/fp16_compression/mark_floatpoint_range.hpp index a61bd270c584ec..c1f948299c4321 100644 --- a/src/common/transformations/include/transformations/fp16_compression/mark_floatpoint_range.hpp +++ b/src/common/transformations/include/transformations/fp16_compression/mark_floatpoint_range.hpp @@ -18,7 +18,7 @@ namespace pass { */ class TRANSFORMATIONS_API MarkFloatingPointRange : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MarkFloatingPointRange", "0"); + OPENVINO_MATCHER_PASS_RTTI("MarkFloatingPointRange"); MarkFloatingPointRange(); }; @@ -27,4 +27,4 @@ OPENVINO_API bool is_range_path(const std::shared_ptr& node); OPENVINO_API void erase_range_path(const std::shared_ptr& node); } // namespace pass -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp b/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp index 6cbd8d990ac73e..22f1eb753a28ad 100644 --- a/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp +++ b/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp @@ -40,7 +40,7 @@ namespace pass { */ class TRANSFORMATIONS_API MarkDequantization : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MarkDequantization", "0"); + OPENVINO_MATCHER_PASS_RTTI("MarkDequantization"); explicit MarkDequantization(const element::TypeVector& precisions, bool fold_subtract_const = false, bool fold_multiply_const = true); @@ -70,7 +70,7 @@ class TRANSFORMATIONS_API MarkDequantization : public ov::pass::MatcherPass { */ class TRANSFORMATIONS_API KeepConstsPrecision : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("KeepConstsPrecision", "0"); + OPENVINO_MATCHER_PASS_RTTI("KeepConstsPrecision"); explicit KeepConstsPrecision(const element::TypeVector& precisions, bool fold_subtract_const = false, bool fold_multiply_const = true); diff --git a/src/common/transformations/include/transformations/op_conversions/bidirectional_sequences_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/bidirectional_sequences_decomposition.hpp index f74f08a9b8c061..a49c1655537844 100644 --- a/src/common/transformations/include/transformations/op_conversions/bidirectional_sequences_decomposition.hpp +++ b/src/common/transformations/include/transformations/op_conversions/bidirectional_sequences_decomposition.hpp @@ -67,7 +67,7 @@ class ov::pass::BidirectionalRNNSequenceDecomposition : public ov::pass::Matcher class ov::pass::BidirectionalSequenceDecomposition : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("BidirectionalSequenceDecomposition", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("BidirectionalSequenceDecomposition"); BidirectionalSequenceDecomposition() { add_matcher(); add_matcher(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_bitwise_to_logical_bool.hpp b/src/common/transformations/include/transformations/op_conversions/convert_bitwise_to_logical_bool.hpp index a5e130e2389af2..64821ce658eb66 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_bitwise_to_logical_bool.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_bitwise_to_logical_bool.hpp @@ -43,7 +43,7 @@ class ov::pass::ConvertBitwiseXorToLogicalXor : public ov::pass::MatcherPass { */ class ConvertBitwiseToLogical : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("ConvertBitwiseToLogical", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("ConvertBitwiseToLogical"); ConvertBitwiseToLogical() { add_matcher(); add_matcher(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_reduce_to_pooling.hpp b/src/common/transformations/include/transformations/op_conversions/convert_reduce_to_pooling.hpp index 36d2b052243382..32a2f7a3ace512 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_reduce_to_pooling.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_reduce_to_pooling.hpp @@ -61,7 +61,7 @@ class ov::pass::ConvertReduceSumToPooling : public ConvertReduceBase { class ov::pass::ConvertReduceToPooling : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("ConvertReduceToPooling", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("ConvertReduceToPooling"); ConvertReduceToPooling() { add_matcher(); add_matcher(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_reduce_to_reshape.hpp b/src/common/transformations/include/transformations/op_conversions/convert_reduce_to_reshape.hpp index f020e768be2feb..9eeb3e5c0f8da6 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_reduce_to_reshape.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_reduce_to_reshape.hpp @@ -84,7 +84,7 @@ class ov::pass::ConvertReduceLogicalOrToReshape : public CvtReduceBase { class ov::pass::ConvertReduceToReshape : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("ConvertReduceToReshape", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("ConvertReduceToReshape"); // Handling reduce if it can be converted to reshape (check input/output tensor) ConvertReduceToReshape() { // Redundant reduce based on its mode diff --git a/src/common/transformations/include/transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp b/src/common/transformations/include/transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp index 46a7e8ff0317e9..e108f4a50ce1f2 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp @@ -59,6 +59,6 @@ class ov::pass::ConvertLSTMSequenceToTensorIterator : public ov::pass::MatcherPa class ov::pass::ConvertSequenceToTensorIterator : public GraphRewrite { public: - OPENVINO_RTTI("ConvertSequenceToTensorIterator", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("ConvertSequenceToTensorIterator"); ConvertSequenceToTensorIterator(); }; diff --git a/src/common/transformations/include/transformations/op_conversions/convert_ti_to_sequences.hpp b/src/common/transformations/include/transformations/op_conversions/convert_ti_to_sequences.hpp index fb53cc81743ec4..f4bd61573e3ac2 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_ti_to_sequences.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_ti_to_sequences.hpp @@ -66,7 +66,7 @@ class ov::pass::ConvertTensorIteratorToGRUSequence : public ov::pass::MatcherPas class ov::pass::ConvertTensorIteratorToSequence : public GraphRewrite { public: - OPENVINO_RTTI("ConvertTensorIteratorToSequence", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("ConvertTensorIteratorToSequence"); ConvertTensorIteratorToSequence(); }; @@ -88,7 +88,7 @@ class ov::pass::ConvertLoopWithScatterUpdateToLSTMSequence : public ov::pass::Ma */ class ov::pass::ConvertLoopToLSTMSequence : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("ConvertLoopToLSTMSequence", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("ConvertLoopToLSTMSequence"); ConvertLoopToLSTMSequence() { add_matcher(); add_matcher(); diff --git a/src/common/transformations/include/transformations/op_conversions/fake_convert_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/fake_convert_decomposition.hpp new file mode 100644 index 00000000000000..e149152b2bcf6d --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/fake_convert_decomposition.hpp @@ -0,0 +1,32 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/matcher_pass.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API FakeConvertDecomposition; + +} // namespace pass +} // namespace ov + +/** + * @ingroup ov_transformation_common_api + * @brief FakeConvertDecomposition transformation decomposes FakeConvert layer. + * f8: f8e4m3, f8e5m2 + * downconvert: f32->f8, f16->f8, bf16->f8 + * upconvert: f8->f32, f8->f16, f8->bf16 + * output = (upconvert(downconvert(input * scale - shift)) + shift) / scale + * + */ + +class ov::pass::FakeConvertDecomposition : public ov::pass::MatcherPass { +public: + OPENVINO_MATCHER_PASS_RTTI("FakeConvertDecomposition"); + FakeConvertDecomposition(); +}; diff --git a/src/common/transformations/include/transformations/sdpa_to_paged_attention/position_ids_replacer.hpp b/src/common/transformations/include/transformations/sdpa_to_paged_attention/position_ids_replacer.hpp index 5ee79ec787a9bc..825ce8acbd7998 100644 --- a/src/common/transformations/include/transformations/sdpa_to_paged_attention/position_ids_replacer.hpp +++ b/src/common/transformations/include/transformations/sdpa_to_paged_attention/position_ids_replacer.hpp @@ -15,12 +15,32 @@ namespace ov { namespace pass { class TRANSFORMATIONS_API PositionIDsReplacer; +class TRANSFORMATIONS_API PositionIDsReplacerQwen; } // namespace pass } // namespace ov class ov::pass::PositionIDsReplacer : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PositionIDsReplacer", "0"); + OPENVINO_MATCHER_PASS_RTTI("PositionIDsReplacer"); explicit PositionIDsReplacer(const Output& position_ids); -}; \ No newline at end of file +}; + +/** + * @brief Qwen model expects data processing in order, the "position ids" input is detached and + * is not explicitly used in the model. The model uses implicitly defined "position ids" based + * on the past KV cache size. + * + * To use this model in Continuous batching mode, we need to apply position_ids and + * use the corresponding rotary_emb_cos/rotary_emb_sin. + * For this, we replace + * rotary_emb_cos/rotary_emb_sin -> Slice -> Slice + * With + * rotary_emb_cos/rotary_emb_sin -> Gather(by position_ids) + * Which enables applying RoPE for each token independently of their order in the input tensor. + */ +class ov::pass::PositionIDsReplacerQwen : public ov::pass::MatcherPass { +public: + OPENVINO_MATCHER_PASS_RTTI("PositionIDsReplacerQwen"); + explicit PositionIDsReplacerQwen(const Output& position_ids); +}; diff --git a/src/common/transformations/include/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.hpp b/src/common/transformations/include/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.hpp index fd4e22c69262ae..d1cc5d5126cd67 100644 --- a/src/common/transformations/include/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.hpp +++ b/src/common/transformations/include/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.hpp @@ -4,7 +4,6 @@ #pragma once -#include "openvino/cc/pass/itt.hpp" #include "openvino/op/shape_of.hpp" #include "openvino/op/subtract.hpp" #include "openvino/pass/matcher_pass.hpp" @@ -22,6 +21,8 @@ class TRANSFORMATIONS_API PrevSequenceLengthPattern; class ov::pass::PrevSequenceLengthPattern : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("PrevSequenceLengthPattern", "0"); - explicit PrevSequenceLengthPattern(std::shared_ptr prev_max_seq_len, std::shared_ptr batch_dim); -}; \ No newline at end of file + OPENVINO_MATCHER_PASS_RTTI("PrevSequenceLengthPattern", "0"); + explicit PrevSequenceLengthPattern(const std::shared_ptr& unsqueezed_input_ids, + const std::shared_ptr& max_context_len, + const std::shared_ptr& position_ids); +}; diff --git a/src/common/transformations/include/transformations/sdpa_to_paged_attention/state_management_pattern.hpp b/src/common/transformations/include/transformations/sdpa_to_paged_attention/state_management_pattern.hpp index feab06ccc0cd5d..79b4f444cfa791 100644 --- a/src/common/transformations/include/transformations/sdpa_to_paged_attention/state_management_pattern.hpp +++ b/src/common/transformations/include/transformations/sdpa_to_paged_attention/state_management_pattern.hpp @@ -17,7 +17,7 @@ class TRANSFORMATIONS_API StateManagementPattern; class ov::pass::StateManagementPattern : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("StateManagementPattern", "0"); + OPENVINO_MATCHER_PASS_RTTI("StateManagementPattern"); StateManagementPattern(ParameterVector& kv_parameters, ParameterVector& model_remaining_params, const std::shared_ptr& sliding_window, @@ -28,4 +28,4 @@ class ov::pass::StateManagementPattern : public ov::pass::MatcherPass { ResultVector& score_results, bool use_block_indices, bool use_score_outputs); -}; \ No newline at end of file +}; diff --git a/src/common/transformations/include/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.hpp b/src/common/transformations/include/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.hpp index c6b319a389ecaa..2456161ea80a78 100644 --- a/src/common/transformations/include/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.hpp +++ b/src/common/transformations/include/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.hpp @@ -15,12 +15,32 @@ namespace ov { namespace pass { class TRANSFORMATIONS_API TotalSequenceLengthPattern; +class TRANSFORMATIONS_API TotalSequenceLengthPatternQwen; } // namespace pass } // namespace ov class ov::pass::TotalSequenceLengthPattern : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("TotalSequenceLengthPattern", "0"); + OPENVINO_MATCHER_PASS_RTTI("TotalSequenceLengthPattern"); explicit TotalSequenceLengthPattern(const std::shared_ptr& max_context_len); }; + +/** + * @brief Qwen model has a specific pattern for TotalSequenceLen place detection. + * + * common pattern: Add (PrevSeqLen, CurrentSeqLen) + * + * The CurrentSeqLen is presented in this form: + * CurrentSeqLen: Parameter(name: input_ids) -> ShapeOf -> Gather + * + * Before applying this transformation, we already detected the PrevSeqLen place in the PrevSequenceLengthPattern + * and replaced it with the next subgraph: + * PrevSeqLen: Subtract (in: Parameter(name: max_context_len), in: CurrentSeqLen) + * + **/ +class ov::pass::TotalSequenceLengthPatternQwen : public ov::pass::MatcherPass { +public: + OPENVINO_MATCHER_PASS_RTTI("TotalSequenceLengthPattern", "0"); + explicit TotalSequenceLengthPatternQwen(const std::shared_ptr& max_context_len); +}; diff --git a/src/common/transformations/include/transformations/smart_reshape/broadcast_const_range_replacement.hpp b/src/common/transformations/include/transformations/smart_reshape/broadcast_const_range_replacement.hpp index 04f7de9a336414..12cfe17a115592 100644 --- a/src/common/transformations/include/transformations/smart_reshape/broadcast_const_range_replacement.hpp +++ b/src/common/transformations/include/transformations/smart_reshape/broadcast_const_range_replacement.hpp @@ -23,6 +23,6 @@ class TRANSFORMATIONS_API BroadcastConstRangeReplacement; class ov::pass::BroadcastConstRangeReplacement : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("BroadcastConstRangeReplacement", "0"); + OPENVINO_MATCHER_PASS_RTTI("BroadcastConstRangeReplacement"); BroadcastConstRangeReplacement(); }; diff --git a/src/common/transformations/include/transformations/smart_reshape/matmul_sr.hpp b/src/common/transformations/include/transformations/smart_reshape/matmul_sr.hpp index 4e21e767d1bce6..cec7ce6bcd074e 100644 --- a/src/common/transformations/include/transformations/smart_reshape/matmul_sr.hpp +++ b/src/common/transformations/include/transformations/smart_reshape/matmul_sr.hpp @@ -30,16 +30,16 @@ class TRANSFORMATIONS_API TransposeMatMul; class ov::pass::ReshapeAMatMul : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ReshapeAMatMul", "0"); + OPENVINO_MATCHER_PASS_RTTI("ReshapeAMatMul"); ReshapeAMatMul(); }; class ov::pass::ReshapeBMatMul : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ReshapeBMatMul", "0"); + OPENVINO_MATCHER_PASS_RTTI("ReshapeBMatMul"); ReshapeBMatMul(); }; class ov::pass::TransposeMatMul : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("TransposeMatMul", "0"); + OPENVINO_MATCHER_PASS_RTTI("TransposeMatMul"); TransposeMatMul(); }; diff --git a/src/common/transformations/include/transformations/smart_reshape/proposal_scales_stridedslice.hpp b/src/common/transformations/include/transformations/smart_reshape/proposal_scales_stridedslice.hpp index d2eaf125e0dd5c..c8d756252509e1 100644 --- a/src/common/transformations/include/transformations/smart_reshape/proposal_scales_stridedslice.hpp +++ b/src/common/transformations/include/transformations/smart_reshape/proposal_scales_stridedslice.hpp @@ -36,12 +36,12 @@ class TRANSFORMATIONS_API Proposal4Scales; class ov::pass::Proposal1Scales : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("Proposal1Scales", "0"); + OPENVINO_MATCHER_PASS_RTTI("Proposal1Scales"); Proposal1Scales(); }; class ov::pass::Proposal4Scales : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("Proposal4Scales", "0"); + OPENVINO_MATCHER_PASS_RTTI("Proposal4Scales"); Proposal4Scales(); }; diff --git a/src/common/transformations/include/transformations/smart_reshape/reshape_sinking.hpp b/src/common/transformations/include/transformations/smart_reshape/reshape_sinking.hpp index aeaf46ccde1c2b..dd64980a0d155a 100644 --- a/src/common/transformations/include/transformations/smart_reshape/reshape_sinking.hpp +++ b/src/common/transformations/include/transformations/smart_reshape/reshape_sinking.hpp @@ -28,6 +28,6 @@ class TRANSFORMATIONS_API ReshapeSinkingMatMul; class ov::pass::ReshapeSinkingMatMul : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ReshapeSinkingMatMul", "0"); + OPENVINO_MATCHER_PASS_RTTI("ReshapeSinkingMatMul"); ReshapeSinkingMatMul(); }; diff --git a/src/common/transformations/include/transformations/smart_reshape/reshape_to_1D.hpp b/src/common/transformations/include/transformations/smart_reshape/reshape_to_1D.hpp index a973038bb30900..cab320f543f382 100644 --- a/src/common/transformations/include/transformations/smart_reshape/reshape_to_1D.hpp +++ b/src/common/transformations/include/transformations/smart_reshape/reshape_to_1D.hpp @@ -25,6 +25,6 @@ class TRANSFORMATIONS_API ReshapeTo1D; class ov::pass::ReshapeTo1D : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ReshapeTo1D", "0"); + OPENVINO_MATCHER_PASS_RTTI("ReshapeTo1D"); ReshapeTo1D(); }; diff --git a/src/common/transformations/include/transformations/smart_reshape/shape_of_const_folding.hpp b/src/common/transformations/include/transformations/smart_reshape/shape_of_const_folding.hpp index d2bc029cce4682..fdb620bf588e72 100644 --- a/src/common/transformations/include/transformations/smart_reshape/shape_of_const_folding.hpp +++ b/src/common/transformations/include/transformations/smart_reshape/shape_of_const_folding.hpp @@ -16,7 +16,7 @@ namespace pass { */ class TRANSFORMATIONS_API ShapeOfConstFolding : public MatcherPass { public: - OPENVINO_RTTI("ShapeOfConstFolding", "0"); + OPENVINO_MATCHER_PASS_RTTI("ShapeOfConstFolding"); ShapeOfConstFolding(); }; diff --git a/src/common/transformations/include/transformations/smart_reshape/strided_slice_squeeze.hpp b/src/common/transformations/include/transformations/smart_reshape/strided_slice_squeeze.hpp index f34b6d25a27e49..cafac7e77857fb 100644 --- a/src/common/transformations/include/transformations/smart_reshape/strided_slice_squeeze.hpp +++ b/src/common/transformations/include/transformations/smart_reshape/strided_slice_squeeze.hpp @@ -27,7 +27,7 @@ class TRANSFORMATIONS_API SqueezeStridedSlice; class ov::pass::StridedSliceSqueeze : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("StridedSliceSqueeze", "0"); + OPENVINO_MATCHER_PASS_RTTI("StridedSliceSqueeze"); StridedSliceSqueeze(); }; @@ -39,6 +39,6 @@ class ov::pass::StridedSliceSqueeze : public ov::pass::MatcherPass { class ov::pass::SqueezeStridedSlice : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("SqueezeStridedSlice", "0"); + OPENVINO_MATCHER_PASS_RTTI("SqueezeStridedSlice"); SqueezeStridedSlice(); }; diff --git a/src/common/transformations/include/transformations/symbolic_transformations/chained_maximum.hpp b/src/common/transformations/include/transformations/symbolic_transformations/chained_maximum.hpp index c7cb03afd5ade4..20a43475233050 100644 --- a/src/common/transformations/include/transformations/symbolic_transformations/chained_maximum.hpp +++ b/src/common/transformations/include/transformations/symbolic_transformations/chained_maximum.hpp @@ -21,6 +21,6 @@ class TRANSFORMATIONS_API ChainedMaximumOptimization; */ class ov::pass::ChainedMaximumOptimization : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ChainedMaximumOptimization", "0"); + OPENVINO_MATCHER_PASS_RTTI("ChainedMaximumOptimization"); ChainedMaximumOptimization(); -}; \ No newline at end of file +}; diff --git a/src/common/transformations/include/transformations/symbolic_transformations/dereshape_matmul.hpp b/src/common/transformations/include/transformations/symbolic_transformations/dereshape_matmul.hpp index fa1c844faa7129..b1586741a05833 100644 --- a/src/common/transformations/include/transformations/symbolic_transformations/dereshape_matmul.hpp +++ b/src/common/transformations/include/transformations/symbolic_transformations/dereshape_matmul.hpp @@ -62,7 +62,7 @@ class TRANSFORMATIONS_API DeReshapeFullyConnected; */ class ov::pass::DeReshapeMatMul : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("DeReshapeMatMul", "0"); + OPENVINO_MATCHER_PASS_RTTI("DeReshapeMatMul"); DeReshapeMatMul(); }; @@ -87,6 +87,6 @@ class ov::pass::DeReshapeMatMul : public ov::pass::MatcherPass { */ class ov::pass::DeReshapeFullyConnected : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("DeReshapeFullyConnected", "0"); + OPENVINO_MATCHER_PASS_RTTI("DeReshapeFullyConnected"); DeReshapeFullyConnected(); }; diff --git a/src/common/transformations/include/transformations/symbolic_transformations/nop_broadcast.hpp b/src/common/transformations/include/transformations/symbolic_transformations/nop_broadcast.hpp index 5a12b5735ce428..524fef52846e5c 100644 --- a/src/common/transformations/include/transformations/symbolic_transformations/nop_broadcast.hpp +++ b/src/common/transformations/include/transformations/symbolic_transformations/nop_broadcast.hpp @@ -20,6 +20,6 @@ class TRANSFORMATIONS_API NopBroadcast; */ class ov::pass::NopBroadcast : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("NopBroadcast", "0"); + OPENVINO_MATCHER_PASS_RTTI("NopBroadcast"); NopBroadcast(); -}; \ No newline at end of file +}; diff --git a/src/common/transformations/include/transformations/symbolic_transformations/reshape_optimizations.hpp b/src/common/transformations/include/transformations/symbolic_transformations/reshape_optimizations.hpp index 5d84d83bad2de5..f23cfd580cbccf 100644 --- a/src/common/transformations/include/transformations/symbolic_transformations/reshape_optimizations.hpp +++ b/src/common/transformations/include/transformations/symbolic_transformations/reshape_optimizations.hpp @@ -52,6 +52,6 @@ class TRANSFORMATIONS_API ReshapeOptimizations; */ class ov::pass::ReshapeOptimizations : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ReshapeOptimizations", "0"); + OPENVINO_MATCHER_PASS_RTTI("ReshapeOptimizations"); ReshapeOptimizations(); }; diff --git a/src/common/transformations/include/transformations/symbolic_transformations/symbolic_optimizations.hpp b/src/common/transformations/include/transformations/symbolic_transformations/symbolic_optimizations.hpp index c6a99c90122544..6197ad4c246f6a 100644 --- a/src/common/transformations/include/transformations/symbolic_transformations/symbolic_optimizations.hpp +++ b/src/common/transformations/include/transformations/symbolic_transformations/symbolic_optimizations.hpp @@ -58,6 +58,6 @@ class ov::pass::SymbolicPropagation : public ov::pass::ModelPass { */ class ov::pass::LabelResolvingThroughSelect : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("LabelResolvingThroughSelect", "0"); + OPENVINO_MATCHER_PASS_RTTI("LabelResolvingThroughSelect"); LabelResolvingThroughSelect(); }; diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_base.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_base.hpp index 013799e854df8f..d16bf401576b96 100644 --- a/src/common/transformations/include/transformations/transpose_sinking/ts_base.hpp +++ b/src/common/transformations/include/transformations/transpose_sinking/ts_base.hpp @@ -27,7 +27,7 @@ class TRANSFORMATIONS_API TSForwardBase; */ class ov::pass::transpose_sinking::TSForwardBase : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ov::pass::TSForwardBase", "0"); + OPENVINO_MATCHER_PASS_RTTI("ov::pass::TSForwardBase"); TSForwardBase() = default; template diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_binary.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_binary.hpp index 9534380d51253e..b8eca55dcb9685 100644 --- a/src/common/transformations/include/transformations/transpose_sinking/ts_binary.hpp +++ b/src/common/transformations/include/transformations/transpose_sinking/ts_binary.hpp @@ -37,6 +37,6 @@ class ov::pass::transpose_sinking::TSBinaryForward : public ov::pass::transpose_ */ class ov::pass::transpose_sinking::TSBinaryBackward : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ov::pass::TSBinaryBackward", "0"); + OPENVINO_MATCHER_PASS_RTTI("ov::pass::TSBinaryBackward"); TSBinaryBackward(); }; diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_concat.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_concat.hpp index cc0ccc2c194dbf..5b6477da94a80d 100644 --- a/src/common/transformations/include/transformations/transpose_sinking/ts_concat.hpp +++ b/src/common/transformations/include/transformations/transpose_sinking/ts_concat.hpp @@ -37,6 +37,6 @@ class ov::pass::transpose_sinking::TSConcatForward : public ov::pass::transpose_ */ class ov::pass::transpose_sinking::TSConcatBackward : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ov::pass::TSConcatBackward", "0"); + OPENVINO_MATCHER_PASS_RTTI("ov::pass::TSConcatBackward"); TSConcatBackward(); }; diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_cumsum.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_cumsum.hpp index 185bea0105ec4d..741c56d5be0de7 100644 --- a/src/common/transformations/include/transformations/transpose_sinking/ts_cumsum.hpp +++ b/src/common/transformations/include/transformations/transpose_sinking/ts_cumsum.hpp @@ -35,6 +35,6 @@ class ov::pass::transpose_sinking::TSCumSumForward : public ov::pass::transpose_ */ class ov::pass::transpose_sinking::TSCumSumBackward : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ov::pass::TSBinaryBackward", "0"); + OPENVINO_MATCHER_PASS_RTTI("ov::pass::TSCumSumBackward"); TSCumSumBackward(); }; diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_data_movement.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_data_movement.hpp index 9775e57d61146b..e1a4f34a109eec 100644 --- a/src/common/transformations/include/transformations/transpose_sinking/ts_data_movement.hpp +++ b/src/common/transformations/include/transformations/transpose_sinking/ts_data_movement.hpp @@ -39,6 +39,6 @@ class ov::pass::transpose_sinking::TSDataMovementForward : public ov::pass::tran */ class ov::pass::transpose_sinking::TSDataMovementBackward : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ov::pass::TSDataMovementBackward", "0"); + OPENVINO_MATCHER_PASS_RTTI("ov::pass::TSDataMovementBackward"); TSDataMovementBackward(); }; diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_fuse.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_fuse.hpp index 974e7accc4d808..6f5a654e70e81c 100644 --- a/src/common/transformations/include/transformations/transpose_sinking/ts_fuse.hpp +++ b/src/common/transformations/include/transformations/transpose_sinking/ts_fuse.hpp @@ -24,6 +24,6 @@ class TRANSFORMATIONS_API TSFuse; */ class ov::pass::transpose_sinking::TSFuse : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("TSFuse", "0"); + OPENVINO_MATCHER_PASS_RTTI("TSFuse"); TSFuse(); -}; \ No newline at end of file +}; diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_gather.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_gather.hpp index 5bc7cea340cb72..891b8bd85c2ed4 100644 --- a/src/common/transformations/include/transformations/transpose_sinking/ts_gather.hpp +++ b/src/common/transformations/include/transformations/transpose_sinking/ts_gather.hpp @@ -37,6 +37,6 @@ class ov::pass::transpose_sinking::TSGatherForward : public ov::pass::transpose_ */ class ov::pass::transpose_sinking::TSGatherBackward : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ov::pass::TSGatherBackward", "0"); + OPENVINO_MATCHER_PASS_RTTI("ov::pass::TSGatherBackward"); TSGatherBackward(); -}; \ No newline at end of file +}; diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_general.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_general.hpp index 09d4d5819322a9..b39a25b9db0872 100644 --- a/src/common/transformations/include/transformations/transpose_sinking/ts_general.hpp +++ b/src/common/transformations/include/transformations/transpose_sinking/ts_general.hpp @@ -29,7 +29,7 @@ using TransposeSinkingGeneral = ov::pass::transpose_sinking::TSGeneral; */ class ov::pass::transpose_sinking::TSGeneralForward : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("TSGeneralForward", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("TSGeneralForward"); TSGeneralForward(); }; @@ -40,7 +40,7 @@ class ov::pass::transpose_sinking::TSGeneralForward : public ov::pass::GraphRewr */ class ov::pass::transpose_sinking::TSGeneralBackward : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("TSGeneralBackward", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("TSGeneralBackward"); TSGeneralBackward(); }; diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_interpolate.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_interpolate.hpp index dfdd062e1f7ce3..90ae417aca9fc6 100644 --- a/src/common/transformations/include/transformations/transpose_sinking/ts_interpolate.hpp +++ b/src/common/transformations/include/transformations/transpose_sinking/ts_interpolate.hpp @@ -37,6 +37,6 @@ class ov::pass::transpose_sinking::TSInterpolateForward : public ov::pass::trans */ class ov::pass::transpose_sinking::TSInterpolateBackward : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ov::pass::TSInterpolateBackward", "0"); + OPENVINO_MATCHER_PASS_RTTI("ov::pass::TSInterpolateBackward"); TSInterpolateBackward(); }; diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_reduction.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_reduction.hpp index a983519a3012d6..d2992bc8a4abd8 100644 --- a/src/common/transformations/include/transformations/transpose_sinking/ts_reduction.hpp +++ b/src/common/transformations/include/transformations/transpose_sinking/ts_reduction.hpp @@ -37,6 +37,6 @@ class ov::pass::transpose_sinking::TSReductionForward : public ov::pass::transpo */ class ov::pass::transpose_sinking::TSReductionBackward : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ov::pass::TSReductionBackward", "0"); + OPENVINO_MATCHER_PASS_RTTI("ov::pass::TSReductionBackward"); TSReductionBackward(); -}; \ No newline at end of file +}; diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_reset_no_sinking_attribute.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_reset_no_sinking_attribute.hpp index 240c6e8342c069..3aa2a770390e35 100644 --- a/src/common/transformations/include/transformations/transpose_sinking/ts_reset_no_sinking_attribute.hpp +++ b/src/common/transformations/include/transformations/transpose_sinking/ts_reset_no_sinking_attribute.hpp @@ -24,6 +24,6 @@ class TRANSFORMATIONS_API TSResetNoSinkingAttribute; */ class ov::pass::transpose_sinking::TSResetNoSinkingAttribute : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ov::pass::TSResetNoSinkingAttribute", "0"); + OPENVINO_MATCHER_PASS_RTTI("ov::pass::TSResetNoSinkingAttribute"); TSResetNoSinkingAttribute(); }; diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_slice.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_slice.hpp index 5e10a7f0e8a930..12ccc614861140 100644 --- a/src/common/transformations/include/transformations/transpose_sinking/ts_slice.hpp +++ b/src/common/transformations/include/transformations/transpose_sinking/ts_slice.hpp @@ -27,6 +27,6 @@ class ov::pass::transpose_sinking::TSSliceForward : public ov::pass::transpose_s class ov::pass::transpose_sinking::TSSliceBackward : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ov::pass::TSSliceBackward", "0"); + OPENVINO_MATCHER_PASS_RTTI("ov::pass::TSSliceBackward"); TSSliceBackward(); }; diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_split.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_split.hpp index cb24aa5273906f..b21bada67ad368 100644 --- a/src/common/transformations/include/transformations/transpose_sinking/ts_split.hpp +++ b/src/common/transformations/include/transformations/transpose_sinking/ts_split.hpp @@ -38,6 +38,6 @@ class ov::pass::transpose_sinking::TSSplitForward : public ov::pass::transpose_s */ class ov::pass::transpose_sinking::TSSplitBackward : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ov::pass::TSSplitBackward", "0"); + OPENVINO_MATCHER_PASS_RTTI("ov::pass::TSSplitBackward"); TSSplitBackward(); }; diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_squeeze.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_squeeze.hpp index 752e040ab52cab..0d86d0a4c29242 100644 --- a/src/common/transformations/include/transformations/transpose_sinking/ts_squeeze.hpp +++ b/src/common/transformations/include/transformations/transpose_sinking/ts_squeeze.hpp @@ -37,6 +37,6 @@ class ov::pass::transpose_sinking::TSSqueezeForward : public ov::pass::transpose */ class ov::pass::transpose_sinking::TSSqueezeBackward : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ov::pass::TSSqueezeBackward", "0"); + OPENVINO_MATCHER_PASS_RTTI("ov::pass::TSSqueezeBackward"); TSSqueezeBackward(); }; diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_tile.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_tile.hpp index 9bb15894d70a81..ffd14ce9a38d84 100644 --- a/src/common/transformations/include/transformations/transpose_sinking/ts_tile.hpp +++ b/src/common/transformations/include/transformations/transpose_sinking/ts_tile.hpp @@ -36,6 +36,6 @@ class ov::pass::transpose_sinking::TSTileForward : public ov::pass::transpose_si */ class ov::pass::transpose_sinking::TSTileBackward : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ov::pass::TSBinaryBackward", "0"); + OPENVINO_MATCHER_PASS_RTTI("ov::pass::TSTileBackward"); TSTileBackward(); }; diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_unary.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_unary.hpp index c8148e912b30c0..1d745ada561224 100644 --- a/src/common/transformations/include/transformations/transpose_sinking/ts_unary.hpp +++ b/src/common/transformations/include/transformations/transpose_sinking/ts_unary.hpp @@ -37,6 +37,6 @@ class ov::pass::transpose_sinking::TSUnaryForward : public ov::pass::transpose_s */ class ov::pass::transpose_sinking::TSUnaryBackward : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("TSUnaryBackwardMultiConsumers", "0"); + OPENVINO_MATCHER_PASS_RTTI("TSUnaryBackward"); TSUnaryBackward(); }; diff --git a/src/common/transformations/include/transformations/transpose_sinking/ts_unsqueeze.hpp b/src/common/transformations/include/transformations/transpose_sinking/ts_unsqueeze.hpp index 1ee195624cb801..60e5f8f7893961 100644 --- a/src/common/transformations/include/transformations/transpose_sinking/ts_unsqueeze.hpp +++ b/src/common/transformations/include/transformations/transpose_sinking/ts_unsqueeze.hpp @@ -37,6 +37,6 @@ class ov::pass::transpose_sinking::TSUnsqueezeForward : public ov::pass::transpo */ class ov::pass::transpose_sinking::TSUnsqueezeBackward : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ov::pass::TSUnsqueezeBackward", "0"); + OPENVINO_MATCHER_PASS_RTTI("ov::pass::TSUnsqueezeBackward"); TSUnsqueezeBackward(); }; diff --git a/src/common/transformations/include/transformations/utils/gen_pattern.hpp b/src/common/transformations/include/transformations/utils/gen_pattern.hpp index 21309e339c959c..976561b4844a17 100644 --- a/src/common/transformations/include/transformations/utils/gen_pattern.hpp +++ b/src/common/transformations/include/transformations/utils/gen_pattern.hpp @@ -539,6 +539,11 @@ class AttrSetter : public ov::AttributeVisitor { a->set(m_attr_map[name].as_vector()); } else if (auto a = ov::as_type>(&adapter)) { a->set(m_attr_map[name].as_T_vector()); + } else if (auto a = dynamic_cast>*>(&adapter)) { + ov::op::util::VariableInfo var_info; + var_info.variable_id = m_attr_map[name].as_string(); + auto variable = std::make_shared(var_info); + a->set(variable); } else { OPENVINO_THROW("unsupported AttributeAdapter for attribute : ", name); } @@ -896,6 +901,7 @@ struct PatternNode { // scalar constant (treated as wildcard for single-element-constant with any rank) PatternNode(int v) : node(std::make_shared(element::from(), Shape({}), v)) {} PatternNode(float v) : node(std::make_shared(element::from(), Shape({}), v)) {} + PatternNode(long long v) : node(std::make_shared(element::from(), Shape({}), v)) {} PatternNode(std::initializer_list v, values_info vi = nullptr) { node = ConstVector(std::vector(v), vi); diff --git a/src/common/transformations/include/transformations/utils/print_model.hpp b/src/common/transformations/include/transformations/utils/print_model.hpp index 0829cd7e320e88..53fa7de51c5eca 100644 --- a/src/common/transformations/include/transformations/utils/print_model.hpp +++ b/src/common/transformations/include/transformations/utils/print_model.hpp @@ -19,6 +19,7 @@ #include "openvino/core/model.hpp" #include "openvino/core/node.hpp" #include "openvino/op/constant.hpp" +#include "openvino/op/util/multi_subgraph_base.hpp" #include "openvino/pass/pass.hpp" #include "transformations/utils/utils.hpp" diff --git a/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp b/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp index 7c22dbdfeac53d..5abe0b5b8c87e3 100644 --- a/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp @@ -31,7 +31,7 @@ namespace ov { namespace pass { class InitNMSPath : public pass::MatcherPass { public: - OPENVINO_RTTI("InitNMSPath", "0"); + OPENVINO_MATCHER_PASS_RTTI("InitNMSPath"); InitNMSPath() { MATCHER_SCOPE(InitNMSPath); auto nms_pattern = pattern::wrap_type(); diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp index 185ae84ec83642..23fbf882024bdc 100644 --- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp @@ -65,6 +65,7 @@ #include "transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.hpp" #include "transformations/common_optimizations/reshape_sequence_fusion.hpp" #include "transformations/common_optimizations/ric_fusion.hpp" +#include "transformations/common_optimizations/sdpa_fusion.hpp" #include "transformations/common_optimizations/select_with_one_value_condition.hpp" #include "transformations/common_optimizations/sequence_fusion.hpp" #include "transformations/common_optimizations/shared_ops_optimization.hpp" @@ -229,6 +230,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr ADD_MATCHER(common_fusions, ConvertTensorIteratorToSequence) ADD_MATCHER(common_fusions, SplitConcatPairToInterpolateFusion, m_use_shapes) ADD_MATCHER(common_fusions, ConvolutionToGroupConvolutionFusion) + ADD_MATCHER(common_fusions, SDPAFusion) if (m_use_shapes) { ADD_MATCHER(common_fusions, NearestNeighborUpsamplingFusion) } diff --git a/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp index 4e9715883ec9f8..1e6d7caec39ac0 100644 --- a/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp @@ -224,6 +224,7 @@ void add_node_with_inputs_to_vector(const std::shared_ptr& node, NodeV } // namespace class SplitConcat : public ov::pass::MatcherPass { public: + OPENVINO_MATCHER_PASS_RTTI("pass::init::SplitConcat"); SplitConcat(NodeVector& nodes_to_fuse) { MATCHER_SCOPE(SplitConcat); auto split_p = pattern::wrap_type(); @@ -280,6 +281,7 @@ class SplitConcat : public ov::pass::MatcherPass { class Gather : public ov::pass::MatcherPass { public: + OPENVINO_MATCHER_PASS_RTTI("pass::init::Gather"); Gather(NodeVector& nodes_to_fuse) { MATCHER_SCOPE(Gather); auto input_p = pattern::any_input(pattern::has_static_rank()); @@ -341,6 +343,7 @@ namespace prop { class Binary : public ov::pass::MatcherPass { public: + OPENVINO_MATCHER_PASS_RTTI("pass::prop::Binary"); Binary() { MATCHER_SCOPE(Binary); auto pattern_root = pattern::wrap_type(); @@ -426,6 +429,7 @@ class Binary : public ov::pass::MatcherPass { class Convolution : public ov::pass::MatcherPass { public: + OPENVINO_MATCHER_PASS_RTTI("pass::prop::Convolution"); Convolution() { MATCHER_SCOPE(Convolution); auto input_p = pattern::any_input(ric_attr::has>); @@ -448,6 +452,7 @@ class Convolution : public ov::pass::MatcherPass { class GroupConvolution : public ov::pass::MatcherPass { public: + OPENVINO_MATCHER_PASS_RTTI("pass::prop::GroupConvolution"); GroupConvolution() { MATCHER_SCOPE(GroupConvolution); auto input_p = pattern::any_input(ric_attr::has>); @@ -504,6 +509,7 @@ class GroupConvolution : public ov::pass::MatcherPass { class ShapeOf : public ov::pass::MatcherPass { public: + OPENVINO_MATCHER_PASS_RTTI("pass::prop::ShapeOf"); ShapeOf() { MATCHER_SCOPE(ShapeOf); auto pattern_root = pattern::wrap_type(); @@ -520,6 +526,7 @@ class ShapeOf : public ov::pass::MatcherPass { class PassThrough : public ov::pass::MatcherPass { public: + OPENVINO_MATCHER_PASS_RTTI("pass::prop::PassThrough"); PassThrough() { MATCHER_SCOPE(PassThrough); auto pattern_root = pattern::wrap_type>); @@ -570,6 +578,7 @@ class Transpose : public ov::pass::MatcherPass { class Unsupported : public ov::pass::MatcherPass { public: + OPENVINO_MATCHER_PASS_RTTI("pass::prop::Unsupported"); Unsupported() { MATCHER_SCOPE(Unsupported); auto pattern_root = pattern::any_input(); @@ -605,6 +614,7 @@ bool need_to_erase_ric(const Output& output) { class InsertReverseInputChannel : public ov::pass::MatcherPass { public: + OPENVINO_MATCHER_PASS_RTTI("pass::fuse::InsertReverseInputChannel"); InsertReverseInputChannel(NodeVector& fused_nodes) { MATCHER_SCOPE(InsertReverseInputChannel); auto pattern_root = pattern::any_input(); @@ -628,6 +638,7 @@ class InsertReverseInputChannel : public ov::pass::MatcherPass { class EraseSplitConcat : public ov::pass::MatcherPass { public: + OPENVINO_MATCHER_PASS_RTTI("pass::fuse::EraseSplitConcat"); EraseSplitConcat() { MATCHER_SCOPE(EraseSplitConcat); auto input_p = pattern::any_input(); @@ -649,6 +660,7 @@ class EraseSplitConcat : public ov::pass::MatcherPass { class EraseGather : public ov::pass::MatcherPass { public: + OPENVINO_MATCHER_PASS_RTTI("pass::fuse::EraseGather"); EraseGather() { MATCHER_SCOPE(EraseGather); auto input_p = pattern::any_input(); @@ -672,6 +684,7 @@ class EraseGather : public ov::pass::MatcherPass { namespace back_prop { class Binary : public ov::pass::MatcherPass { public: + OPENVINO_MATCHER_PASS_RTTI("pass::back_prop::Binary"); Binary() { MATCHER_SCOPE(Binary); auto fake_quantize_pattern = @@ -755,6 +768,7 @@ class Binary : public ov::pass::MatcherPass { class ConvertPassThrough : public ov::pass::MatcherPass { public: + OPENVINO_MATCHER_PASS_RTTI("pass::back_prop::ConvertPassThrough"); ConvertPassThrough() { MATCHER_SCOPE(ConvertPassThrough); auto pattern_root = pattern::wrap_type(pattern::has_static_rank()); diff --git a/src/common/transformations/src/transformations/common_optimizations/sdpa_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/sdpa_fusion.cpp new file mode 100644 index 00000000000000..fc581580f70001 --- /dev/null +++ b/src/common/transformations/src/transformations/common_optimizations/sdpa_fusion.cpp @@ -0,0 +1,127 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/sdpa_fusion.hpp" + +#include "openvino/core/rt_info.hpp" +#include "openvino/core/type.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/scaled_dot_product_attention.hpp" +#include "openvino/op/softmax.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/op/unsqueeze.hpp" +#include "openvino/pass/pattern/op/optional.hpp" +#include "openvino/pass/pattern/op/pattern.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/utils/gen_pattern.hpp" + +namespace ov { +namespace pass { + +SDPAFusion::SDPAFusion() { + using namespace ov::pass::pattern; + using namespace ov::gen_pattern; + + auto q = makePattern(ov::Rank(4)); + auto k = makePattern(ov::Rank(4)); + auto v = makePattern(ov::Rank(4)); + auto mask = makePattern(); + + auto k_transpose_order = pattern::wrap_type([](const Output& node) { + auto axis_order = + std::dynamic_pointer_cast(node.get_node_shared_ptr())->cast_vector(); + return axis_order == std::vector{0, 1, 3, 2}; + }); + + auto k_t = pattern::wrap_type({k, k_transpose_order}); + auto qk_nn = makePattern({q, k_t}, {{"transpose_a", false}, {"transpose_b", false}}); + auto qk_nt = makePattern({q, k}, {{"transpose_a", false}, {"transpose_b", true}}); + auto qk = qk_nt | qk_nn; + auto optional_add_mask = optional({qk, mask}); + auto softmax = makePattern({optional_add_mask}, {{"axis", "-1"}}); + auto qkv = makePattern({softmax, v}, {{"transpose_a", false}, {"transpose_b", false}}); + + auto valid_qk_shapes = [](const std::shared_ptr& qk_matmul) { + auto q_pshape = qk_matmul->get_input_partial_shape(0); + auto k_pshape = qk_matmul->get_input_partial_shape(1); + + const size_t q_head_size_idx = 3; + const size_t k_head_size_idx = qk_matmul->get_transpose_b() ? 3 : 2; + + return q_pshape.size() == 4 && k_pshape.size() == 4 && q_pshape[q_head_size_idx].is_static() && + k_pshape[k_head_size_idx].is_static() && + q_pshape[q_head_size_idx].get_length() == k_pshape[k_head_size_idx].get_length(); + }; + + ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + if (transformation_callback(m.get_match_root())) { + return false; + } + + auto q_node = pattern_map.at(q); + auto k_node = pattern_map.at(k); + auto v_node = pattern_map.at(v); + + if (!valid_qk_shapes(ov::as_type_ptr(pattern_map.at(qk).get_node_shared_ptr()))) { + return false; + } + + if (pattern_map.at(qk).get_target_inputs().size() > 1 || + pattern_map.at(softmax).get_target_inputs().size() > 1) { + return false; + } + if (pattern_map.count(optional_add_mask) && (pattern_map.at(optional_add_mask).get_target_inputs().size() > 1 || + pattern_map.at(mask).get_partial_shape().size() > 4)) { + return false; + } + + Output mask_value; + Output mask_input; + if (pattern_map.find(optional_add_mask) != pattern_map.end()) { + mask_value = pattern_map.at(mask); + } else { + mask_value = ov::op::v0::Constant::create(q_node.get_element_type(), ov::Shape{}, std::vector{0}); + } + + if (mask_value.get_partial_shape().size() > 4) { + return false; + } + + if (mask_value.get_partial_shape().rank() == 0 || mask_value.get_partial_shape().rank() == 4) { + mask_input = mask_value; + } else { + size_t rank_diff = q_node.get_partial_shape().size() - mask_value.get_partial_shape().size(); + std::vector axes(rank_diff); + std::iota(axes.begin(), axes.end(), 0); + mask_input = std::make_shared( + mask_value, + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{rank_diff}, axes)); + } + + std::shared_ptr scale_node = + ov::op::v0::Constant::create(q_node.get_element_type(), ov::Shape{}, std::vector{1.0f}); + + std::shared_ptr sdpa = std::make_shared(q_node, + k_node, + v_node, + mask_input, + scale_node, + false); + + sdpa->set_friendly_name(m.get_match_root()->get_friendly_name()); + ov::copy_runtime_info(m.get_matched_nodes(), sdpa); + ov::replace_node(m.get_match_root(), sdpa); + + return true; + }; + + auto m = std::make_shared(qkv, "SDPAFusion"); + this->register_matcher(m, callback); +} + +} // namespace pass +} // namespace ov diff --git a/src/common/transformations/src/transformations/common_optimizations/sdpa_scale_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/sdpa_scale_fusion.cpp new file mode 100644 index 00000000000000..3d750fe38a868e --- /dev/null +++ b/src/common/transformations/src/transformations/common_optimizations/sdpa_scale_fusion.cpp @@ -0,0 +1,140 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/sdpa_scale_fusion.hpp" + +#include + +#include "openvino/core/node.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/core/type.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/scaled_dot_product_attention.hpp" +#include "openvino/pass/pattern/op/optional.hpp" +#include "openvino/pass/pattern/op/pattern.hpp" +#include "transformations/utils/gen_pattern.hpp" + +namespace ov { +namespace pass { + +SDPAScaleFusion::SDPAScaleFusion() { + using namespace ov::pass::pattern; + using namespace ov::gen_pattern; + + auto q = makePattern(ov::Rank(4)); + auto k = makePattern(ov::Rank(4)); + auto v = makePattern(ov::Rank(4)); + auto mask = makePattern(); + auto sdpa_scale = makeConst({}); + auto scale_q = makePattern("[]") | makePattern("[1]"); + auto scale_k = makePattern("[]") | makePattern("[1]"); + + auto scaled_q = optional({q, scale_q}); + auto scaled_k = optional({k, scale_k}); + auto sdpa_mask_scale = + makePattern({scaled_q, scaled_k, v, mask, sdpa_scale}, + {{"causal", false}}); + auto sdpa_mask = + makePattern({scaled_q, scaled_k, v, mask}, {{"causal", false}}); + auto sdpa_simple = + makePattern({scaled_q, scaled_k, v}, {{"causal", false}}); + auto sdpa = sdpa_simple | sdpa_mask | sdpa_mask_scale; + + ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + if (transformation_callback(m.get_match_root())) { + return false; + } + + auto sdpa = m.get_match_root(); + + const bool has_q_scale = pattern_map.count(scaled_q); + const bool has_k_scale = pattern_map.count(scaled_k); + + // Nothing to do + if (!has_q_scale && !has_k_scale) + return false; + + auto prev_scale_value = 1.0f; + auto scale_q_value = 1.0f; + auto scale_k_value = 1.0f; + auto scale_et = sdpa->get_output_element_type(0); + + Output q_input = sdpa->get_input_source_output(0); + Output k_input = sdpa->get_input_source_output(1); + + std::shared_ptr scale_q_node = nullptr; + std::shared_ptr scale_k_node = nullptr; + + if (pattern_map.find(sdpa_scale) != pattern_map.end()) { + auto prev_scale_node = + ov::as_type_ptr(pattern_map.at(sdpa_scale).get_node_shared_ptr()); + prev_scale_value = prev_scale_node->cast_vector()[0]; + scale_et = prev_scale_node->get_output_element_type(0); + } else { + auto head_size = q_input.get_partial_shape()[3]; + if (head_size.is_dynamic()) + return false; + + prev_scale_value = 1.0f / std::sqrt(static_cast(head_size.get_length())); + } + + // Extract scalar scale values for Q and K if those are constant and set new inputs for SDPA + if (has_q_scale) { + scale_q_node = pattern_map.at(scale_q).get_node_shared_ptr(); + if (ov::is_type(scale_q_node)) { + scale_q_value = ov::as_type_ptr(scale_q_node)->cast_vector()[0]; + q_input = pattern_map.at(q); + } + } + if (has_k_scale) { + scale_k_node = pattern_map.at(scale_k).get_node_shared_ptr(); + if (ov::is_type(scale_k_node)) { + scale_k_value = ov::as_type_ptr(scale_k_node)->cast_vector()[0]; + k_input = pattern_map.at(k); + } + } + + Output new_scale_node; + auto new_scale_val = prev_scale_value * scale_q_value * scale_k_value; + + // If new scale is 1 and we have non-constant scale node for either Q or K, then we can make it a scale of SDPA + if (new_scale_val == 1.0f) { + if (has_q_scale && !ov::is_type(scale_q_node)) { + new_scale_node = pattern_map.at(scale_q); + q_input = pattern_map.at(q); + } else if (has_k_scale && !ov::is_type(scale_k_node)) { + new_scale_node = pattern_map.at(scale_k); + k_input = pattern_map.at(k); + } else { + new_scale_node = ov::op::v0::Constant::create(scale_et, ov::Shape{}, std::vector{new_scale_val}); + } + } else { + new_scale_node = ov::op::v0::Constant::create(scale_et, ov::Shape{}, std::vector{new_scale_val}); + } + + OutputVector new_inputs = {q_input, k_input, pattern_map.at(v)}; + if (pattern_map.find(mask) != pattern_map.end()) { + new_inputs.push_back(pattern_map.at(mask)); + } else { + new_inputs.push_back( + ov::op::v0::Constant::create(new_scale_node.get_element_type(), ov::Shape{}, std::vector{0.0f})); + } + + new_inputs.push_back(new_scale_node); + + auto new_sdpa = sdpa->clone_with_new_inputs(new_inputs); + new_sdpa->set_friendly_name(sdpa->get_friendly_name()); + ov::copy_runtime_info(sdpa, new_sdpa); + ov::replace_node(sdpa, new_sdpa); + + return true; + }; + + auto m = std::make_shared(sdpa, "SDPAScaleFusion"); + this->register_matcher(m, callback); +} + +} // namespace pass +} // namespace ov diff --git a/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp b/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp index 2235e87c792b0d..fc667bd23a97b4 100644 --- a/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp +++ b/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp @@ -116,7 +116,7 @@ const std::shared_ptr propagate_through_ops = */ class PropagateUpMarkToKeepInMixedPrecision : public pass::MatcherPass { public: - OPENVINO_RTTI("PropagateUpMarkToKeepInMixedPrecision", "0"); + OPENVINO_MATCHER_PASS_RTTI("PropagateUpMarkToKeepInMixedPrecision"); PropagateUpMarkToKeepInMixedPrecision() { MATCHER_SCOPE(PropagateUpMarkToKeepInMixedPrecision); @@ -159,7 +159,7 @@ class PropagateUpMarkToKeepInMixedPrecision : public pass::MatcherPass { */ class PropagateDownMarkToKeepInMixedPrecision : public pass::MatcherPass { public: - OPENVINO_RTTI("PropagateDownMarkToKeepInMixedPrecision", "0"); + OPENVINO_MATCHER_PASS_RTTI("PropagateDownMarkToKeepInMixedPrecision"); PropagateDownMarkToKeepInMixedPrecision() { MATCHER_SCOPE(PropagateDownMarkToKeepInMixedPrecision); @@ -197,7 +197,7 @@ class PropagateDownMarkToKeepInMixedPrecision : public pass::MatcherPass { class InitMarkReduceOpPath : public pass::MatcherPass { public: - OPENVINO_RTTI("InitMarkReduceOpPath", "0"); + OPENVINO_MATCHER_PASS_RTTI("InitMarkReduceOpPath"); InitMarkReduceOpPath() { MATCHER_SCOPE(InitMarkReduceOpPath); @@ -217,7 +217,7 @@ class InitMarkReduceOpPath : public pass::MatcherPass { class PropagateMarkUpReduceOpPath : public pass::MatcherPass { public: - OPENVINO_RTTI("PropagateMarkUpReduceOpPath", "0"); + OPENVINO_MATCHER_PASS_RTTI("PropagateMarkUpReduceOpPath"); PropagateMarkUpReduceOpPath() { MATCHER_SCOPE(PropagateMarkUpReduceOpPath); @@ -244,8 +244,8 @@ class PropagateMarkUpReduceOpPath : public pass::MatcherPass { class MarkExp : public pass::MatcherPass { public: + OPENVINO_MATCHER_PASS_RTTI("MarkExp"); // only exponent that go into ReduceOp should be marked as precision sensitive and kept in f32 - OPENVINO_RTTI("MarkExp", "0"); MarkExp() { MATCHER_SCOPE(MarkExp); auto exp_pattern = pattern::wrap_type(); @@ -288,7 +288,7 @@ class MarkExpInReduceOpPath : public BackwardGraphRewrite { */ class MarkDivWithEps : public MatcherPass { public: - OPENVINO_RTTI("MarkDivWithEps", "0"); + OPENVINO_MATCHER_PASS_RTTI("MarkDivWithEps"); MarkDivWithEps() { MATCHER_SCOPE(MarkDivWithEps); @@ -367,7 +367,7 @@ class MarkDivWithEps : public MatcherPass { class PropagateDownDisableSensitivityForQuantized : public pass::MatcherPass { public: - OPENVINO_RTTI("DisableMarkingForQuantizedNodes", "0"); + OPENVINO_MATCHER_PASS_RTTI("PropagateDownDisableSensitivityForQuantized"); PropagateDownDisableSensitivityForQuantized() { MATCHER_SCOPE(PropagateDownDisableSensitivityForQuantized); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp b/src/common/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp index 9f3b6b976d14df..119816266ffdc4 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp @@ -1319,6 +1319,7 @@ ov::pass::ConvertLoopWithSlicedInputConcatOutputToLSTMSequence::ConvertLoopWithS class EliminateGatherWithRange : public ov::pass::MatcherPass { public: + OPENVINO_MATCHER_PASS_RTTI("EliminateGatherWithRange"); EliminateGatherWithRange() { using namespace ov; using namespace ov::pass; diff --git a/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp new file mode 100644 index 00000000000000..7f0a44df6a151d --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp @@ -0,0 +1,76 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/fake_convert_decomposition.hpp" + +#include "itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/fake_convert.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/subtract.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" + +ov::pass::FakeConvertDecomposition::FakeConvertDecomposition() { + MATCHER_SCOPE(FakeConvertDecomposition); + auto data = pattern::any_input(); + + auto fake_convert = ov::pass::pattern::wrap_type(); + + matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); + const auto fake_convert_node = + ov::as_type_ptr(pattern_to_output.at(fake_convert).get_node_shared_ptr()); + + if (fake_convert_node == nullptr || transformation_callback(fake_convert_node)) { + return false; + } + + Output data{fake_convert_node->input_value(0)}; + const Output input_scale{fake_convert_node->input_value(1)}; + auto input_type = data.get_element_type(); + + ov::pass::NodeRegistry decomp_ops; + if (input_type != input_scale.get_element_type()) { + input_type = input_scale.get_element_type(); + data = std::make_shared(data, input_type); + data = decomp_ops.add(data.get_node_shared_ptr()); + } + + std::shared_ptr result; + const auto scale = decomp_ops.make(data, input_scale); + if (fake_convert_node->get_input_size() == 2) { + const auto downconvert = + decomp_ops.make(scale, fake_convert_node->get_destination_element_type()); + const auto upconvert = decomp_ops.make(downconvert, input_type); + + result = decomp_ops.make(upconvert, input_scale); + } else { + const Output input_shift{fake_convert_node->input_value(2)}; + const auto shift = decomp_ops.make(scale, input_shift); + + const auto downconvert = + decomp_ops.make(shift, fake_convert_node->get_destination_element_type()); + const auto upconvert = decomp_ops.make(downconvert, input_type); + + const auto deshift = decomp_ops.make(upconvert, input_shift); + result = decomp_ops.make(deshift, input_scale); + } + + if (result->get_output_element_type(0) != fake_convert_node->get_output_element_type(0)) { + result = decomp_ops.make(result, fake_convert_node->get_output_element_type(0)); + } + + result->set_friendly_name(m.get_match_root()->get_friendly_name()); + ov::copy_runtime_info(fake_convert_node, decomp_ops.get()); + ov::replace_node(m.get_match_root(), result); + return true; + }; + + auto m = std::make_shared(fake_convert, matcher_name); + register_matcher(m, callback); +} diff --git a/src/common/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp b/src/common/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp index a72a49fb4832eb..397746c75bb84d 100644 --- a/src/common/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp +++ b/src/common/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp @@ -7,11 +7,18 @@ #include "openvino/cc/pass/itt.hpp" #include "openvino/op/gather.hpp" #include "openvino/op/matmul.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/slice.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/op/unsqueeze.hpp" #include "openvino/pass/pattern/op/optional.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/utils/utils.hpp" using namespace ov::op; +using namespace ov::pass::pattern; // TODO: Instead of using the following transformation that matches quite a specific place in a model graph in case when // position_ids parameter is missing, consider replacing always existing attention_mask parameter with a sub-graph using @@ -19,25 +26,90 @@ using namespace ov::op; ov::pass::PositionIDsReplacer::PositionIDsReplacer(const Output& position_ids) { MATCHER_SCOPE(PositionIDsReplacer); - auto input_ids = pattern::any_input(); - auto input_embed = pattern::wrap_type({pattern::any_input(), input_ids, pattern::any_input()}); + auto input_ids = any_input(); + auto input_embed = wrap_type({any_input(), input_ids, any_input()}); - auto position_ids_pattern = pattern::any_input(); - auto offset = pattern::wrap_type(); - auto add_offset = pattern::wrap_type({position_ids_pattern, offset}); - auto convert = pattern::wrap_type({add_offset}); - auto position_embed = pattern::wrap_type({pattern::any_input(), convert, pattern::any_input()}); + auto position_ids_pattern = any_input(); + auto offset = wrap_type(); + auto add_offset = wrap_type({position_ids_pattern, offset}); + auto convert = wrap_type({add_offset}); + auto position_embed = wrap_type({any_input(), convert, any_input()}); - auto mul = pattern::optional({input_embed, pattern::any_input()}); + auto mul = optional({input_embed, any_input()}); - auto add = pattern::wrap_type({mul, position_embed}); + auto add = wrap_type({mul, position_embed}); - ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + ov::matcher_pass_callback callback = [=](Matcher& m) { const auto& pattern_map = m.get_pattern_value_map(); replace_node(pattern_map.at(position_ids_pattern).get_node_shared_ptr(), position_ids.get_node_shared_ptr()); return true; }; - auto m = std::make_shared(add, matcher_name); + auto m = std::make_shared(add, matcher_name); register_matcher(m, callback); -} \ No newline at end of file +} + +ov::pass::PositionIDsReplacerQwen::PositionIDsReplacerQwen(const Output& position_ids) { + MATCHER_SCOPE(PositionIDsReplacerQwen); + + auto _const = []() { + return wrap_type(); + }; + + // total seq len: + auto p_max_context_len = wrap_type(); + auto p_opt_convert = optional(p_max_context_len); + auto p_opt_reshape = optional({p_opt_convert, any_input()}); + + // current seq len: + // it might be present in 2 different ways: + // input_ids -> unsqueeze -> reshape -> convert -> shape_of -> gather + // QKV -> variadic_split(Q or K) -> rope Q/K -> shape_of -> gather + // Probably we can use the symbols to re-use one of these ways. + // Currently, "any_input" is used to detect the both places. + auto p_shape_of = wrap_type({any_input()}); + auto p_current_len = wrap_type({p_shape_of, _const(), _const()}); + + auto p_neg_const = wrap_type(); + auto p_neg_mul = wrap_type({p_current_len, p_neg_const}); + // the rotary_emb_cos/rotary_emb_sin are sliced by the total length [1,..4096,1,128] + auto p_rotary_emb_sincos = wrap_type(); + auto p_slice_1 = wrap_type({p_rotary_emb_sincos, _const(), p_opt_reshape, _const(), _const()}); + auto p_slice_2 = wrap_type({p_slice_1, p_neg_mul, _const(), _const(), _const()}); + + ov::matcher_pass_callback callback = [=](Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + auto max_context_len = pattern_map.at(p_max_context_len).get_node_shared_ptr(); + if (max_context_len->get_friendly_name() != "max_context_len") { + return false; + } + auto rotary_emb_sincos = pattern_map.at(p_rotary_emb_sincos).get_node_shared_ptr(); + auto slice_1 = pattern_map.at(p_slice_1).get_node_shared_ptr(); + auto slice_2 = pattern_map.at(p_slice_2).get_node_shared_ptr(); + + auto axis = v0::Constant::create(element::i64, Shape{}, {1}); + // in case of PagedAttention (Continuous batching) the rotary_emb_cos/rotary_emb_sin + // are used not in the sequential order, so we need to use position_ids to get the expected values. + auto gather = std::make_shared(slice_1->input_value(0), position_ids, axis); + gather->set_friendly_name(slice_2->get_friendly_name()); + gather->validate_and_infer_types(); + + auto pshape = rotary_emb_sincos->get_output_partial_shape(0); + if (pshape.rank().is_dynamic() || pshape.rank().get_length() != 4) { + return false; + } + + // PagedAttention expects the next layout for Q,K,V: + // [batch_size_in_tokens, num_kv_heads * head_size] + // so here we need to reshape the output tensor to move the seq dim (num tokens) to the batch + // num_kv_heads * head_size are already handled in the StateManagementPattern transformation + auto head_size = static_cast(pshape[3].get_length()); + auto new_shape = v0::Constant::create(element::i64, Shape{4}, std::vector{-1, 1, 1, head_size}); + auto reshape = std::make_shared(gather, new_shape, false); + replace_node(slice_2, reshape); + return true; + }; + + auto m = std::make_shared(p_slice_2, matcher_name); + register_matcher(m, callback); +} diff --git a/src/common/transformations/src/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.cpp b/src/common/transformations/src/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.cpp index 36d9d88975b2e0..55d7af822c3857 100644 --- a/src/common/transformations/src/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.cpp +++ b/src/common/transformations/src/transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.cpp @@ -14,8 +14,9 @@ using namespace ov::op; -ov::pass::PrevSequenceLengthPattern::PrevSequenceLengthPattern(std::shared_ptr prev_max_seq_len, - std::shared_ptr batch_dim) { +ov::pass::PrevSequenceLengthPattern::PrevSequenceLengthPattern(const std::shared_ptr& unsqueezed_input_ids, + const std::shared_ptr& max_context_len, + const std::shared_ptr& position_ids) { MATCHER_SCOPE(PrevSequenceLengthPattern); // The transformation addresses two cases that look similar: (1) previous sequence length, (2) batch size in // kv-cache state In first case it should replace it by prev_max_seq_len. For the second case, connect to batch_dim. @@ -40,8 +41,16 @@ ov::pass::PrevSequenceLengthPattern::PrevSequenceLengthPattern(std::shared_ptrget_output_element_type(0); std::shared_ptr replacement; if (kv_init_shape[axis].is_static() && kv_init_shape[axis].get_length() == 0) { + auto cur_seq_len = std::make_shared(std::make_shared(unsqueezed_input_ids), + v0::Constant::create(element::i64, Shape{}, {1}), + v0::Constant::create(element::i64, Shape{}, {0})); + auto cur_seq_len_i32 = std::make_shared(cur_seq_len, element::i32); + auto prev_max_seq_len = std::make_shared(max_context_len, cur_seq_len_i32); replacement = prev_max_seq_len; } else { + // it is not always required, so will be disposed if not needed + auto batch_dim = std::make_shared(position_ids); + // assumption that any other axis should point to batch dimension, precise reasoning is too complex // TODO: provide more reliable check replacement = batch_dim; diff --git a/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp b/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp index b55c3d73316120..a36085c34237a4 100644 --- a/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp +++ b/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp @@ -437,6 +437,7 @@ ov::pass::StateManagementPattern::StateManagementPattern(ParameterVector& kv_par parameters_to_remove.push_back(param); } + pa_transpose->set_friendly_name(sdpa_node->get_friendly_name()); replace_node(m.get_match_root(), pa_transpose); return true; }; diff --git a/src/common/transformations/src/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.cpp b/src/common/transformations/src/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.cpp index 18387d5ca1ae04..cbf9426a0c82c5 100644 --- a/src/common/transformations/src/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.cpp +++ b/src/common/transformations/src/transformations/sdpa_to_paged_attention/total_sequence_length_pattern.cpp @@ -6,27 +6,49 @@ #include "openvino/cc/pass/itt.hpp" #include "openvino/core/validation_util.hpp" +#include "openvino/op/add.hpp" #include "openvino/op/concat.hpp" #include "openvino/op/gather.hpp" +#include "openvino/op/reshape.hpp" #include "openvino/op/shape_of.hpp" +#include "openvino/op/subtract.hpp" +#include "openvino/op/unsqueeze.hpp" +#include "openvino/pass/pattern/op/optional.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/utils/utils.hpp" using namespace ov::op; +using namespace ov::pass::pattern; + +namespace { + +void align_replacement(std::shared_ptr& replacement, + const ov::PartialShape& required_shape, + ov::element::Type target_type) { + if (replacement->get_output_element_type(0) != target_type) { + replacement = std::make_shared(replacement, target_type); + } + + if (replacement->get_output_partial_shape(0) != required_shape && required_shape.rank().is_static()) { + replacement = ov::op::util::reshapeTo(replacement, ov::Shape(required_shape.rank().get_length(), 1)); + } +} + +} // namespace ov::pass::TotalSequenceLengthPattern::TotalSequenceLengthPattern( const std::shared_ptr& max_context_len) { MATCHER_SCOPE(TotalSequenceLengthPattern); - auto kv_past = pattern::wrap_type({pattern::any_input()}); - auto kv_gather = pattern::wrap_type({kv_past, pattern::any_input(), pattern::any_input()}); - auto kv_current = pattern::any_input(); - auto kv_concat = pattern::wrap_type({kv_gather, kv_current}); - auto kv_shape = pattern::wrap_type({kv_concat}); - auto gather_idx_label = pattern::wrap_type(); - auto seq = pattern::wrap_type({kv_shape, gather_idx_label, pattern::any_input()}); + auto kv_past = wrap_type({any_input()}); + auto kv_gather = wrap_type({kv_past, any_input(), any_input()}); + auto kv_current = any_input(); + auto kv_concat = wrap_type({kv_gather, kv_current}); + auto kv_shape = wrap_type({kv_concat}); + auto gather_idx_label = wrap_type(); + auto seq = wrap_type({kv_shape, gather_idx_label, any_input()}); - ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + ov::matcher_pass_callback callback = [=](Matcher& m) { // TODO: Check that seq has axis that really takes sequence len but not any other dimension -- // use symbolic infra or look at the constant input const auto& pattern_map = m.get_pattern_value_map(); @@ -71,16 +93,8 @@ ov::pass::TotalSequenceLengthPattern::TotalSequenceLengthPattern( if (concat_axis_to_compare == gather_idx_to_compare) { auto target_type = gather->get_output_element_type(0); - - if (replacement->get_output_element_type(0) != target_type) { - replacement = std::make_shared(replacement, target_type); - } - auto required_shape = gather->get_output_partial_shape(0); - - if (replacement->get_output_partial_shape(0) != required_shape && required_shape.rank().is_static()) { - replacement = op::util::reshapeTo(replacement, Shape(required_shape.rank().get_length(), 1)); - } + align_replacement(replacement, required_shape, target_type); } else { // TODO: change in the future when we start supporting dynamic shapes here replacement = ov::util::get_constant_from_source(gather->output(0)); @@ -94,6 +108,41 @@ ov::pass::TotalSequenceLengthPattern::TotalSequenceLengthPattern( return true; }; - auto m = std::make_shared(seq, matcher_name); + auto m = std::make_shared(seq, matcher_name); + register_matcher(m, callback); +} + +ov::pass::TotalSequenceLengthPatternQwen::TotalSequenceLengthPatternQwen( + const std::shared_ptr& max_context_len) { + MATCHER_SCOPE(TotalSequenceLengthPatternQwen); + + auto p_input_ids = wrap_type(); + auto p_unsqueeze = wrap_type({p_input_ids, any_input()}); + auto p_opt_reshape_1 = optional({p_unsqueeze, any_input()}); + auto p_opt_convert_1 = optional(p_opt_reshape_1); + auto p_kv_shape_current = wrap_type({p_opt_convert_1}); + auto p_seq_current = wrap_type({p_kv_shape_current, any_input(), any_input()}); + auto p_opt_convert_2 = optional(p_seq_current); + + auto p_max_context_len = wrap_type(); + auto p_prev_max_seq_len = wrap_type({p_max_context_len, any_input()}); + auto p_opt_convert_3 = optional(p_prev_max_seq_len); + auto p_opt_reshape_2 = optional({p_opt_convert_3, any_input()}); + auto p_total_seq = wrap_type({p_opt_convert_2, p_opt_reshape_2}); + + ov::matcher_pass_callback callback = [=](Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + auto total_seq = pattern_map.at(p_total_seq).get_node_shared_ptr(); + std::shared_ptr replacement = max_context_len; + + auto target_type = total_seq->get_output_element_type(0); + auto required_shape = total_seq->get_output_partial_shape(0); + align_replacement(replacement, required_shape, target_type); + + replace_node(total_seq, replacement); + return true; + }; + + auto m = std::make_shared(p_total_seq, matcher_name); register_matcher(m, callback); -} \ No newline at end of file +} diff --git a/src/common/transformations/tests/common_optimizations/sdpa_fusion_test.cpp b/src/common/transformations/tests/common_optimizations/sdpa_fusion_test.cpp new file mode 100644 index 00000000000000..52c10ba5967bd8 --- /dev/null +++ b/src/common/transformations/tests/common_optimizations/sdpa_fusion_test.cpp @@ -0,0 +1,234 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/softmax.hpp" +#include "openvino/op/transpose.hpp" + +using namespace testing; +using namespace ov::pass; +using namespace ov; + +TEST_F(TransformationTestsF, SDPAFusionTest1) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f32, query_shape); + const auto key = std::make_shared(element::f32, key_shape); + const auto value = std::make_shared(element::f32, value_shape); + const auto casual = false; + { + const auto qk = std::make_shared(query, key, false, true); + const auto softmax = std::make_shared(qk, -1); + const auto qkv = std::make_shared(softmax, value, false, false); + + model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value}); + manager.register_pass(); + } + + { + const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{1.0f}); + const auto mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{0.0f}); + const auto sdpa = std::make_shared(query, + key, + value, + mask_const, + scale_const, + casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAFusionTest2) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f16, query_shape); + const auto key = std::make_shared(element::f16, key_shape); + const auto value = std::make_shared(element::f16, value_shape); + const auto casual = false; + { + const auto qk = std::make_shared(query, key, false, true); + const auto softmax = std::make_shared(qk, -1); + const auto qkv = std::make_shared(softmax, value, false, false); + + model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value}); + manager.register_pass(); + } + + { + const auto scale_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{1.0f}); + const auto mask_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{0.0f}); + const auto sdpa = std::make_shared(query, + key, + value, + mask_const, + scale_const, + casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAFusionTest3) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f16, query_shape); + const auto key = std::make_shared(element::f16, key_shape); + const auto value = std::make_shared(element::f16, value_shape); + const auto casual = false; + { + const auto key_t = + std::make_shared(key, + op::v0::Constant::create(element::i64, Shape{4}, {0, 1, 3, 2})); + const auto qk = std::make_shared(query, key_t, false, false); + const auto softmax = std::make_shared(qk, -1); + const auto qkv = std::make_shared(softmax, value, false, false); + + model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value}); + manager.register_pass(); + } + + { + const auto scale_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{1.0f}); + const auto mask_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{0.0f}); + const auto sdpa = std::make_shared(query, + key, + value, + mask_const, + scale_const, + casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAFusionTest4) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, 32, -1}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f16, query_shape); + const auto key = std::make_shared(element::f16, key_shape); + const auto value = std::make_shared(element::f16, value_shape); + { + const auto qk = std::make_shared(query, key, false, false); + const auto softmax = std::make_shared(qk, -1); + const auto qkv = std::make_shared(softmax, value, false, false); + + model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value}); + manager.register_pass(); + } + + model_ref = model->clone(); + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAFusionTest5) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + const PartialShape attention_mask_shape{1, 32, -1, -1}; + + const auto query = std::make_shared(element::f16, query_shape); + const auto key = std::make_shared(element::f16, key_shape); + const auto value = std::make_shared(element::f16, value_shape); + const auto mask = std::make_shared(element::f16, attention_mask_shape); + const auto casual = false; + { + const auto qk = std::make_shared(query, key, false, true); + const auto mask_add = std::make_shared(qk, mask); + const auto softmax = std::make_shared(mask_add, -1); + const auto qkv = std::make_shared(softmax, value, false, false); + + model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value, mask}); + manager.register_pass(); + } + + { + const auto scale_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{1.0f}); + const auto sdpa = + std::make_shared(query, key, value, mask, scale_const, casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value, mask}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAFusionTest6) { + const PartialShape query_shape{1, 32, 10, 32}; + const PartialShape key_shape{1, 32, 10, 32}; + const PartialShape value_shape{1, 32, 10, 32}; + const PartialShape attention_mask_shape{1, 1, 10, 10}; + + const auto query = std::make_shared(element::f16, query_shape); + const auto key = std::make_shared(element::f16, key_shape); + const auto value = std::make_shared(element::f16, value_shape); + const auto mask = std::make_shared(element::f16, attention_mask_shape); + const auto casual = false; + { + const auto qk = std::make_shared(query, key, false, true); + const auto mask_add = std::make_shared(qk, mask); + const auto softmax = std::make_shared(mask_add, -1); + const auto qkv = std::make_shared(softmax, value, false, false); + + model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value, mask}); + manager.register_pass(); + } + + { + const auto scale_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{1.0f}); + const auto sdpa = + std::make_shared(query, key, value, mask, scale_const, casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value, mask}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAFusionTest7) { + const PartialShape query_shape{1, 8, -1, 32}; + const PartialShape key_shape{-1, 1, 8, 32}; + const PartialShape value_shape{1, 8, -1, 32}; + + const auto query = std::make_shared(element::f16, query_shape); + const auto key = std::make_shared(element::f16, key_shape); + const auto value = std::make_shared(element::f16, value_shape); + { + const auto key_t = + std::make_shared(key, + op::v0::Constant::create(element::i64, Shape{4}, {1, 2, 3, 0})); + const auto qk = std::make_shared(query, key_t, false, false); + const auto softmax = std::make_shared(qk, -1); + const auto qkv = std::make_shared(softmax, value, false, false); + + model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value}); + manager.register_pass(); + } +} diff --git a/src/common/transformations/tests/common_optimizations/sdpa_scale_fusion_test.cpp b/src/common/transformations/tests/common_optimizations/sdpa_scale_fusion_test.cpp new file mode 100644 index 00000000000000..f922f030a9c43b --- /dev/null +++ b/src/common/transformations/tests/common_optimizations/sdpa_scale_fusion_test.cpp @@ -0,0 +1,228 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/scaled_dot_product_attention.hpp" + +using namespace testing; +using namespace ov::pass; +using namespace ov; + +TEST_F(TransformationTestsF, SDPAScaleFusionTest1) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f32, query_shape); + const auto key = std::make_shared(element::f32, key_shape); + const auto value = std::make_shared(element::f32, value_shape); + const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{8.0f}); + const auto v_scaled = std::make_shared(value, scale_const); + const auto casual = false; + { + const auto q_scaled = std::make_shared(query, scale_const); + const auto k_scaled = std::make_shared(key, scale_const); + const auto sdpa = + std::make_shared(q_scaled, k_scaled, v_scaled, casual); + + model = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + manager.register_pass(); + } + + { + const auto new_mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{0.0f}); + const auto new_scale_const = + ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{64.0f / std::sqrt(32.0f)}); + const auto sdpa = std::make_shared(query, + key, + v_scaled, + new_mask_const, + new_scale_const, + casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAScaleFusionTest2) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f32, query_shape); + const auto key = std::make_shared(element::f32, key_shape); + const auto value = std::make_shared(element::f32, value_shape); + const auto sdpa_mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{0.0f}); + const auto sdpa_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{2.0f}); + const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{8.0f}); + const auto v_scaled = std::make_shared(value, scale_const); + const auto casual = false; + { + const auto q_scaled = std::make_shared(query, scale_const); + const auto k_scaled = std::make_shared(key, scale_const); + const auto sdpa = std::make_shared(q_scaled, + k_scaled, + v_scaled, + sdpa_mask_const, + sdpa_scale_const, + casual); + + model = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + manager.register_pass(); + } + + { + const auto new_scale_const = + ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{128.0f}); + const auto sdpa = std::make_shared(query, + key, + v_scaled, + sdpa_mask_const, + new_scale_const, + casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAScaleFusionTest3) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f32, query_shape); + const auto key = std::make_shared(element::f32, key_shape); + const auto value = std::make_shared(element::f32, value_shape); + const auto sdpa_mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{0.0f}); + const auto sdpa_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{2.0f}); + const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{8.0f}); + const auto v_scaled = std::make_shared(value, scale_const); + const auto casual = false; + { + const auto q_scaled = std::make_shared(query, scale_const); + const auto sdpa = std::make_shared(q_scaled, + key, + v_scaled, + sdpa_mask_const, + sdpa_scale_const, + casual); + + model = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + manager.register_pass(); + } + + { + const auto new_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{16.0f}); + const auto sdpa = std::make_shared(query, + key, + v_scaled, + sdpa_mask_const, + new_scale_const, + casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAScaleFusionTest4) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f32, query_shape); + const auto key = std::make_shared(element::f32, key_shape); + const auto value = std::make_shared(element::f32, value_shape); + const auto sdpa_mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{0.0f}); + const auto sdpa_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{2.0f}); + const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{8.0f}); + const auto scale_dyn = std::make_shared(element::f32, ov::Shape{}); + const auto v_scaled = std::make_shared(value, scale_const); + const auto casual = false; + const auto q_scaled = std::make_shared(query, scale_dyn); + { + const auto k_scaled = std::make_shared(key, scale_const); + const auto sdpa = std::make_shared(q_scaled, + k_scaled, + v_scaled, + sdpa_mask_const, + sdpa_scale_const, + casual); + + model = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value, scale_dyn}); + manager.register_pass(); + } + + { + const auto new_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{16.0f}); + const auto sdpa = std::make_shared(q_scaled, + key, + v_scaled, + sdpa_mask_const, + new_scale_const, + casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value, scale_dyn}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAScaleFusionTest5) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f32, query_shape); + const auto key = std::make_shared(element::f32, key_shape); + const auto value = std::make_shared(element::f32, value_shape); + const auto sdpa_mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{0.0f}); + const auto sdpa_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{1.0f}); + const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{1.0f}); + const auto scale_dyn = std::make_shared(element::f32, ov::Shape{}); + const auto v_scaled = std::make_shared(value, scale_const); + const auto casual = false; + { + const auto q_scaled = std::make_shared(query, scale_dyn); + const auto k_scaled = std::make_shared(key, scale_const); + const auto sdpa = std::make_shared(q_scaled, + k_scaled, + v_scaled, + sdpa_mask_const, + sdpa_scale_const, + casual); + + model = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value, scale_dyn}); + manager.register_pass(); + } + + { + const auto sdpa = std::make_shared(query, + key, + v_scaled, + sdpa_mask_const, + scale_dyn, + casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value, scale_dyn}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} diff --git a/src/common/transformations/tests/op_conversions/fake_convert_decomposition_test.cpp b/src/common/transformations/tests/op_conversions/fake_convert_decomposition_test.cpp new file mode 100644 index 00000000000000..33b167ace11e24 --- /dev/null +++ b/src/common/transformations/tests/op_conversions/fake_convert_decomposition_test.cpp @@ -0,0 +1,149 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/fake_convert_decomposition.hpp" + +#include + +#include "common_test_utils/common_utils.hpp" +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/opsets/opset1.hpp" +#include "openvino/opsets/opset13.hpp" + +using namespace ov; + +using FakeConvertDecompositionParams = std::tuple; // default shift + +class FakeConvertDecompositionTest : public ov::test::TestsCommon, + public ::testing::WithParamInterface { +public: + static std::string getTestCaseName(::testing::TestParamInfo obj) { + FakeConvertDecompositionParams params = obj.param; + + Shape data_shape, scale_shape, shift_shape; + element::Type_t data_prec, dst_prec; + bool default_shift; + std::tie(data_shape, scale_shape, shift_shape, data_prec, dst_prec, default_shift) = params; + + std::ostringstream result; + result << "dataShape=" << ov::test::utils::vec2str(data_shape) << "_"; + result << "scaleShape=" << ov::test::utils::vec2str(scale_shape) << "_"; + result << "shiftShape=" << ov::test::utils::vec2str(shift_shape) << "_"; + result << "dataPrecision=" << element::Type(data_prec) << "_"; + result << "destinationPrecision=" << element::Type(dst_prec) << "_"; + if (default_shift) + result << "defaultShift=true"; + else + result << "defaultShift=false"; + return result.str(); + } +}; + +TEST_P(FakeConvertDecompositionTest, CompareFunctions) { + FakeConvertDecompositionParams params = this->GetParam(); + + Shape data_shape, scale_shape, shift_shape; + element::Type_t data_prec, dst_prec; + bool default_shift; + std::tie(data_shape, scale_shape, shift_shape, data_prec, dst_prec, default_shift) = params; + + std::shared_ptr model(nullptr); + { + const auto data = std::make_shared(data_prec, PartialShape(data_shape)); + const auto scale = std::make_shared(data_prec, scale_shape); + const auto shift = std::make_shared(data_prec, shift_shape); + + const auto fake_convert = default_shift ? std::make_shared(data, scale, dst_prec) + : std::make_shared(data, scale, shift, dst_prec); + model = std::make_shared(NodeVector{fake_convert}, ParameterVector{data}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(model); + + OV_ASSERT_NO_THROW(check_rt_info(model)); + } + + std::shared_ptr model_ref(nullptr); + { + const auto input_data = std::make_shared(data_prec, PartialShape(data_shape)); + const auto input_scale = std::make_shared(data_prec, scale_shape); + const auto input_shift = std::make_shared(data_prec, shift_shape); + ParameterVector params; + params.push_back(input_data); + std::shared_ptr data = input_data; + + std::shared_ptr result; + const auto scale = std::make_shared(data, input_scale); + if (default_shift) { + const auto downconvert = std::make_shared(scale, dst_prec); + const auto upconvert = std::make_shared(downconvert, data_prec); + + result = std::make_shared(upconvert, input_scale); + } else { + const auto shift = std::make_shared(scale, input_shift); + + const auto downconvert = std::make_shared(shift, dst_prec); + const auto upconvert = std::make_shared(downconvert, data_prec); + + const auto deshift = std::make_shared(upconvert, input_shift); + result = std::make_shared(deshift, input_scale); + } + + model_ref = std::make_shared(NodeVector{result}, params); + } + + const auto res = compare_functions(model, model_ref); + ASSERT_TRUE(res.first) << res.second; +} + +const std::vector data_precisions = {element::Type_t::f32, + element::Type_t::f16, + element::Type_t::bf16}; + +const std::vector destination_precisions = {element::Type_t::f8e4m3, element::Type_t::f8e5m2}; + +const std::vector default_shift = {true, false}; + +const auto simple_fake_convert_params = ::testing::Combine(::testing::Values(Shape{2, 3, 4, 5}), + ::testing::Values(Shape{1}), + ::testing::Values(Shape{1}), + ::testing::ValuesIn(data_precisions), + ::testing::ValuesIn(destination_precisions), + ::testing::ValuesIn(default_shift)); + +const auto broadcast_fake_convert_params = ::testing::Combine(::testing::Values(Shape{2, 3, 4, 5}), + ::testing::Values(Shape{2, 3, 1, 1}), + ::testing::Values(Shape{2, 3, 1, 1}), + ::testing::ValuesIn(data_precisions), + ::testing::ValuesIn(destination_precisions), + ::testing::ValuesIn(default_shift)); + +const auto elementwise_fake_convert_params = ::testing::Combine(::testing::Values(Shape{2, 3, 4, 5}), + ::testing::Values(Shape{2, 3, 4, 5}), + ::testing::Values(Shape{2, 3, 4, 5}), + ::testing::ValuesIn(data_precisions), + ::testing::ValuesIn(destination_precisions), + ::testing::ValuesIn(default_shift)); + +INSTANTIATE_TEST_SUITE_P(SimpleFakeConvert_Decomposition, + FakeConvertDecompositionTest, + simple_fake_convert_params, + FakeConvertDecompositionTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(BroadcastFakeConvert_Decomposition, + FakeConvertDecompositionTest, + broadcast_fake_convert_params, + FakeConvertDecompositionTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(ElementwiseFakeConvert_Decomposition, + FakeConvertDecompositionTest, + elementwise_fake_convert_params, + FakeConvertDecompositionTest::getTestCaseName); diff --git a/src/common/transformations/tests/op_conversions/sdpa_to_paged_attention_test.cpp b/src/common/transformations/tests/op_conversions/sdpa_to_paged_attention_test.cpp new file mode 100644 index 00000000000000..840309993c939a --- /dev/null +++ b/src/common/transformations/tests/op_conversions/sdpa_to_paged_attention_test.cpp @@ -0,0 +1,618 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/pass/sdpa_to_paged_attention.hpp" + +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/core/model.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/ops.hpp" +#include "openvino/op/paged_attention.hpp" +#include "openvino/op/power.hpp" +#include "openvino/op/reduce_mean.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/scaled_dot_product_attention.hpp" +#include "openvino/op/select.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/sqrt.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/op/subtract.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/op/unsqueeze.hpp" +#include "transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.hpp" +#include "transformations/sdpa_to_paged_attention/total_sequence_length_pattern.hpp" +#include "transformations/utils/gen_pattern.hpp" +#include "transformations/utils/print_model.hpp" + +using namespace ov; +using namespace std; +using namespace testing; +using namespace ov::op; +using namespace ov::gen_pattern; + +namespace { + +// Constants and Parameters attributes: +auto el_type_i64 = std::pair({"element_type", "i64"}); +auto el_type_i32 = std::pair({"element_type", "i32"}); +auto el_type_f32 = std::pair({"element_type", "f32"}); + +// Convert ops attributes: +auto dest_type_i64 = std::pair({"destination_type", "i64"}); +auto dest_type_f32 = std::pair({"destination_type", "f32"}); +auto dest_type_f16 = std::pair({"destination_type", "f16"}); + +// Other attributes: +auto numpy_broadcast = std::pair({"auto_broadcast", "numpy"}); +auto special_zero_true = std::pair({"special_zero", true}); + +auto single_val = [](int rank, float val) { + return makeConst(element::f32, ov::Shape{std::vector(rank, 1)}, {val}); +}; + +ov::ParameterVector nodes_to_params(const ov::NodeVector& node_vec) { + ov::ParameterVector params; + params.reserve(node_vec.size()); + for (const auto& node : node_vec) { + params.push_back(ov::as_type_ptr(node)); + } + return params; +} + +enum QKV : int { Q = 0, K = 1, V = 2 }; +vector MOCK_VALUE = {1}; + +// original weights = 151936, attention_weights = 12288 +#define WEIGHTS 1024 +#define ATTENTION_WEIGHTS 512 + +class Qwen7bChatSDPA { +public: + static std::shared_ptr gen_embeddings(const std::shared_ptr& input_ids) { + auto view_reshape = makeOP({input_ids, {-1, 0}}, {special_zero_true}); + auto input_ids_i64 = makeOP({view_reshape}, {dest_type_i64}); + + auto weights = makeConst(element::u8, {WEIGHTS, 4096}, MOCK_VALUE); + auto weights_fp16 = makeOP({weights}, {dest_type_f16}); + auto zero_point = makeConst(element::u8, {WEIGHTS, 1}, MOCK_VALUE); + auto zero_point_fp16 = makeOP({zero_point}, {dest_type_f16}); + auto zero_point_subtract = makeOP({weights_fp16, zero_point_fp16}, {numpy_broadcast}); + + auto scale = makeConst(element::f16, {WEIGHTS, 1}, MOCK_VALUE); + auto mul_scale = makeOP({zero_point_subtract, scale}, {numpy_broadcast}); + auto fq_weights = makeOP({mul_scale}, {dest_type_f32}); + + return makeOP({fq_weights, input_ids_i64, 0}, {{"batch_dims", 0}}); + } + + static std::shared_ptr gen_attention_weights() { + auto weights = makeConst(element::u8, {ATTENTION_WEIGHTS, 4096}, MOCK_VALUE); + auto weights_f16 = makeOP({weights}, {dest_type_f16}); + + auto zero_points = makeConst(element::u8, {ATTENTION_WEIGHTS, 1}, MOCK_VALUE); + auto zero_points_f16 = makeOP({zero_points}, {dest_type_f16}); + auto subtract = makeOP({weights_f16, zero_points_f16}, {numpy_broadcast}); + + auto scale = makeConst(element::f16, {ATTENTION_WEIGHTS, 1}, MOCK_VALUE); + auto mul = makeOP({subtract, scale}, {numpy_broadcast}); + return makeOP({mul}, {dest_type_f32}); + } + + static std::shared_ptr gen_qkv_proj(const std::shared_ptr& embeddings) { + auto _const_0 = single_val(/*rank*/ 3, /*val*/ 2); + auto pow = makeOP({embeddings, _const_0}, {numpy_broadcast}); + auto mean = makeOP({pow, {-1}}, {{"keep_dims", true}}); + + auto _const_1 = single_val(/*rank*/ 3, /*val*/ 1); + auto add = makeOP({mean, _const_1}, {numpy_broadcast}); + auto sqrt = makeOP({add}); + + auto _const_2 = single_val(/*rank*/ 3, /*val*/ 1); + auto div = makeOP({_const_2, sqrt}, {numpy_broadcast, {"m_pythondiv", true}}); + auto mul_0 = makeOP({embeddings, div}, {numpy_broadcast}); + + auto _const_3 = makeConst(element::f32, {1, 1, 4096}, MOCK_VALUE); + auto mul_1 = makeOP({mul_0, _const_3}, {numpy_broadcast}); + auto attention_weights = gen_attention_weights(); + auto linear_matmul = + makeOP({mul_1, attention_weights}, {{"transpose_a", false}, {"transpose_b", true}}); + + auto _const_4 = makeConst(element::f32, {1, 1, ATTENTION_WEIGHTS}, MOCK_VALUE); + auto linear_add = makeOP({linear_matmul, _const_4}, {numpy_broadcast}); + return makeOP({linear_add, 2, {4096, 4096, -1}}); + } + + static std::shared_ptr gen_cache(const std::shared_ptr& input_ids, + const std::shared_ptr& beam_idx, + const std::string& name) { + auto shape_of = makeOP({input_ids}, {{"output_type", "i64"}}); + auto gather = makeOP({shape_of, {0}, 0}, {{"batch_dims", 0}}); + auto concat = makeOP({gather, {0ll}, {32ll}, {128ll}}, {{"axis", 0}}); + auto init_to_read = makeOP({0.000000f, concat}, {{"mode", "numpy"}}); + auto cache = makeOP( + {init_to_read}, + {{"variable_id", name}, {"variable_type", "f32"}, {"variable_shape", PartialShape{DYN, DYN, 32, 128}}}); + return makeOP({cache, beam_idx, 0}, {{"batch_dims", 0}}); + } + + static std::shared_ptr gen_current_len(const std::shared_ptr& input_ids) { + auto shape_of = makeOP({input_ids}, {{"output_type", "i64"}}); + return makeOP({shape_of, {1}, 0}, {{"batch_dims", 0}}); + } + + static std::shared_ptr gen_past_len(const std::shared_ptr& k_cache) { + auto shape_of = makeOP({k_cache}, {{"output_type", "i64"}}); + return makeOP({shape_of, {1}, 0}, {{"batch_dims", 0}}); + } + + static std::shared_ptr gen_total_len(const std::shared_ptr& cur_len, + const std::shared_ptr& past_len) { + return makeOP({cur_len, past_len}, {numpy_broadcast}); + } + + static std::shared_ptr gen_rope(QKV idx, + const std::shared_ptr& qkv_proj, + const std::shared_ptr& head_size, + const std::shared_ptr& sliced_sin, + const std::shared_ptr& sliced_cos) { + auto current_k = makeOP({qkv_proj->output(idx), {0, 0, 32, 128}}, {special_zero_true}); + auto sliced_k = makeOP({current_k, {0}, head_size, {1}, {3}}); + auto mul_1 = makeOP({sliced_k, sliced_cos}, {numpy_broadcast}); + + auto reshape = makeOP({sliced_k, {0, 0, 32, 2, 64}}, {special_zero_true}); + auto split_1 = makeOP({reshape, -2}, {{"num_splits", 2}}); + auto list_unpack_1 = makeOP({split_1->output(1), -2}); + + auto _const = single_val(/*rank*/ 4, /*val*/ 1); + auto mul_2 = makeOP({list_unpack_1, _const}, {numpy_broadcast}); + auto list_unpack_2 = makeOP({split_1->output(0), -2}); + auto concat = makeOP({mul_2, list_unpack_2}, {{"axis", -1}}); + + auto mul_3 = makeOP({concat, sliced_sin}, {numpy_broadcast}); + return makeOP({mul_1, mul_3}, {numpy_broadcast}); + } + + static std::shared_ptr gen_rope_emb_sin(const std::shared_ptr& total_seq_len, + const std::shared_ptr& neg_mul, + std::shared_ptr& head_size) { + auto sin = makeConst(element::f32, {1, 4096, 1, 128}, MOCK_VALUE); + auto sliced_sin_by_total = makeOP({sin, {0}, total_seq_len, {1}, {1}}); + auto rotary_emb_sin_shape = makeOP({sliced_sin_by_total}, {{"output_type", "i64"}}); + head_size = makeOP({rotary_emb_sin_shape, {3}, 0}, {{"batch_dims", 0}}); + return makeOP({sliced_sin_by_total, neg_mul, {LLONG_MAX}, {1}, {1}}); + } + + static std::shared_ptr gen_rope_emb_cos(const std::shared_ptr& total_seq_len, + const std::shared_ptr& neg_mul) { + auto cos = makeConst(element::f32, {1, 4096, 1, 128}, MOCK_VALUE); + auto sliced_cos_by_total = makeOP({cos, {0}, total_seq_len, {1}, {1}}); + return makeOP({sliced_cos_by_total, neg_mul, {LLONG_MAX}, {1}, {1}}); + } + + static std::shared_ptr neg_mul(const std::shared_ptr& current_seq_len) { + return makeOP({current_seq_len, {-1ll}}, {numpy_broadcast}); + } + + static std::shared_ptr gen_V(const std::shared_ptr& cache, const std::shared_ptr& qkv_proj) { + auto v_current = makeOP({qkv_proj->output(2), {0, 0, 32, 128}}, {special_zero_true}); + auto v_total = makeOP({cache, v_current}, {{"axis", 1}}); + return makeOP({v_total, {0, 2, 1, 3}}); + } + + static std::shared_ptr gen_K(const std::shared_ptr& cache, const std::shared_ptr& rope_K) { + auto full_k = makeOP({cache, rope_K}, {{"axis", 1}}); + return makeOP({full_k, {0, 2, 1, 3}}); + } + + static std::shared_ptr gen_Q(const std::shared_ptr& past_seq_len_2, + const std::shared_ptr& total_seq_len_2, + const std::shared_ptr& rope_Q) { + auto _const = makeConst(element::f32, {1, 32767, 1, 1}, MOCK_VALUE); + auto slice = makeOP({_const, past_seq_len_2, total_seq_len_2, {1}, {1}}); + auto mul = makeOP({rope_Q, slice}, {numpy_broadcast}); + return makeOP({mul, {0, 2, 1, 3}}); + } + + static std::shared_ptr gen_total_seq_len_2(const std::shared_ptr& past_k_len, + const std::shared_ptr& rope_k) { + auto shape_rope_k = makeOP({rope_k}, {{"output_type", "i64"}}); + auto cur_len = makeOP({shape_rope_k, {1}, 0}, {{"batch_dims", 0}}); + return makeOP({past_k_len, cur_len}, {numpy_broadcast}); + } + + static std::shared_ptr gen_past_seq_len_2(const std::shared_ptr& total_seq_len, + const std::shared_ptr& rope_q) { + auto shape_rope_q = makeOP({rope_q}, {{"output_type", "i64"}}); + auto cur_len = makeOP({shape_rope_q, {1}, 0}, {{"batch_dims", 0}}); + return makeOP({total_seq_len, cur_len}, {numpy_broadcast}); + } + + static std::shared_ptr gen_attention_mask(const std::shared_ptr& Q_in, + const std::shared_ptr& attention_mask_in, + const std::shared_ptr& total_seq_len) { + auto _const = makeConst(element::boolean, {1, 1, 8192, 8192}, MOCK_VALUE); + auto shape_of_q = makeOP({Q_in}, {{"output_type", "i64"}}); + auto gather = makeOP({shape_of_q, {2}, 0}, {{"batch_dims", 0}}); + auto sub_1 = makeOP({total_seq_len, gather}, {numpy_broadcast}); + auto concat = makeOP({sub_1, {0ll}}, {{"axis", 0}}); + auto broadcast = makeOP({total_seq_len, {2}}, {{"mode", "numpy"}}); + auto slice = makeOP({_const, concat, broadcast, {1, 1}, {2, 3}}); + auto bitwise_not = makeOP({slice}); + + auto _const_1 = single_val(/*rank*/ 4, /*val*/ 1); + auto view_reshape = makeOP({attention_mask_in, {0, 0}}, {special_zero_true}); + auto unsqueeze_0 = makeOP({view_reshape, 1}); + auto unsqueeze_1 = makeOP({unsqueeze_0, 2}); + auto convert_0 = makeOP({unsqueeze_1}, {dest_type_f32}); + + auto _const_2 = single_val(/*rank*/ 4, /*val*/ 1); + auto mul_1 = makeOP({convert_0, _const_2}, {numpy_broadcast}); + auto sub_2 = makeOP({_const_1, mul_1}, {numpy_broadcast}); + + auto _const_3 = single_val(/*rank*/ 4, /*val*/ 1); + auto mul_2 = makeOP({sub_2, _const_3}, {numpy_broadcast}); + auto list_construct = makeOP({{1ll}, {1ll}, gather, {1ll}}, {{"axis", 0}}); + auto expand_broadcast = makeOP({mul_2, list_construct}, {{"mode", "bidirectional"}}); + return makeOP({bitwise_not, -FLT_MAX, expand_broadcast}, {numpy_broadcast}); + } +}; + +class Qwen7bChatPA { +public: + static std::shared_ptr gen_embeddings(const std::shared_ptr& input_ids) { + auto weights = makeConst(element::u8, {WEIGHTS, 4096}, MOCK_VALUE); + auto weights_fp16 = makeOP({weights}, {dest_type_f16}); + + auto zero_point = makeConst(element::u8, {WEIGHTS, 1}, MOCK_VALUE); + auto zero_point_fp16 = makeOP({zero_point}, {dest_type_f16}); + auto sub = makeOP({weights_fp16, zero_point_fp16}, {numpy_broadcast}); + + auto scale = makeConst(element::f16, {WEIGHTS, 1}, MOCK_VALUE); + auto mul = makeOP({sub, scale}, {numpy_broadcast}); + auto mul_fp32 = makeOP({mul}, {dest_type_f32}); + + auto reshape_view = makeOP({input_ids, {-1, 0}}, {special_zero_true}); + auto reshape_view_i64 = makeOP({reshape_view}, {dest_type_i64}); + return makeOP({mul_fp32, reshape_view_i64, 0}, {{"batch_dims", 0}}); + } + + static std::shared_ptr gen_qkv_proj(const std::shared_ptr& embeddings) { + auto _const_0 = makeConst(element::f32, {1, 1, 1}, MOCK_VALUE); + auto pow = makeOP({embeddings, _const_0}, {numpy_broadcast}); + auto mean = makeOP({pow, {-1}}, {{"keep_dims", true}}); + auto _const_1 = makeConst(element::f32, {1, 1, 1}, MOCK_VALUE); + auto add_0 = makeOP({mean, _const_1}, {numpy_broadcast}); + + auto sqrt = makeOP({add_0}); + auto _const_2 = makeConst(element::f32, {1, 1, 1}, MOCK_VALUE); + auto div = makeOP({_const_2, sqrt}, {numpy_broadcast, {"m_pythondiv", true}}); + auto mul_0 = makeOP({embeddings, div}, {numpy_broadcast}); + + auto _const_3 = makeConst(element::f32, {1, 1, 4096}, MOCK_VALUE); + auto mul_1 = makeOP({mul_0, _const_3}, {numpy_broadcast}); + + auto _const_4 = makeConst(element::u8, {ATTENTION_WEIGHTS, 4096}, MOCK_VALUE); + auto convert_0 = makeOP({_const_4}, {dest_type_f16}); + + auto _const_5 = makeConst(element::u8, {ATTENTION_WEIGHTS, 1}, MOCK_VALUE); + auto convert_1 = makeOP({_const_5}, {dest_type_f16}); + auto sub = makeOP({convert_0, convert_1}, {numpy_broadcast}); + + auto _const_6 = makeConst(element::f16, {ATTENTION_WEIGHTS, 1}, MOCK_VALUE); + auto mul_2 = makeOP({sub, _const_6}, {numpy_broadcast}); + auto convert_2 = makeOP({mul_2}, {dest_type_f32}); + auto matmul = makeOP({mul_1, convert_2}, {{"transpose_a", false}, {"transpose_b", true}}); + auto Constant_270 = makeConst(element::f32, {1, 1, ATTENTION_WEIGHTS}, MOCK_VALUE); + auto add_1 = makeOP({matmul, Constant_270}, {numpy_broadcast}); + + return makeOP({add_1, 2, {4096, 4096, -1}}); + } + + static std::shared_ptr gen_rope(QKV idx, + const std::shared_ptr& qkv_proj, + const std::shared_ptr& head_size, + const std::shared_ptr& sin, + const std::shared_ptr& cos) { + auto Q_or_K = makeOP({qkv_proj->output(idx), {0, 0, 32, 128}}, {special_zero_true}); + auto sliced = makeOP({Q_or_K, {0}, head_size, {1}, {3}}); + auto mul_0 = makeOP({sliced, sin}, {numpy_broadcast}); + + auto reshape = makeOP({sliced, {0, 0, 32, 2, 64}}, {special_zero_true}); + auto split = makeOP({reshape, -2}, {{"num_splits", 2}}); + auto squeeze_0 = makeOP({split->output(1), -2}); + auto _const_0 = makeConst(element::f32, {1, 1, 1, 1}, {1.000000f}); + auto mul_1 = makeOP({squeeze_0, _const_0}, {numpy_broadcast}); + + auto squeeze_1 = makeOP({split->output(0), -2}); + auto concat = makeOP({mul_1, squeeze_1}, {{"axis", -1}}); + auto mul_2 = makeOP({concat, cos}, {numpy_broadcast}); + return makeOP({mul_0, mul_2}, {numpy_broadcast}); + } + + static std::shared_ptr gen_rope_emb_sin(const std::shared_ptr& max_context_len, + const std::shared_ptr& position_ids, + std::shared_ptr& head_size) { + auto sin = makeConst(element::f32, {1, 4096, 1, 128}, MOCK_VALUE); + auto slice_sin = makeOP({sin, position_ids, 1}, {{"batch_dims", 0}}); + + auto slice = makeOP({sin, {0}, max_context_len, {1}, {1}}); + auto shape_of = makeOP({slice}, {{"output_type", "i64"}}); + head_size = makeOP({shape_of, {3}, 0}, {{"batch_dims", 0}}); + + return makeOP({slice_sin, {-1, 1, 1, 128}}, {{"special_zero", false}}); + } + + static std::shared_ptr gen_rope_emb_cos(const std::shared_ptr& max_context_len, + const std::shared_ptr& position_ids) { + auto cos = makeConst(element::f32, {1, 4096, 1, 128}, MOCK_VALUE); + auto slice = makeOP({cos, position_ids, 1}, {{"batch_dims", 0}}); + return makeOP({slice, {-1, 1, 1, 128}}, {{"special_zero", false}}); + } + + static std::shared_ptr align_pa_layout(const std::shared_ptr& pa, + const std::shared_ptr& head_size) { + auto shape = makeOP({{0ll}, {1ll}, {-1ll}, head_size}, {{"axis", 0}}); + auto reshaped = makeOP({pa->output(0), shape}, {special_zero_true}); + return makeOP({reshaped, {0, 2, 1, 3}}); + } + + static std::shared_ptr gen_current_len(const std::shared_ptr& rope_K) { + auto shape_of = makeOP({rope_K}, {{"output_type", "i32"}}); + return makeOP({shape_of, {1}, 0ll}, {{"batch_dims", 0}}); + } + + static std::shared_ptr gen_past_len(const std::shared_ptr& input_ids, + const std::shared_ptr& max_context_len) { + auto shape_of = makeOP({input_ids}, {{"output_type", "i64"}}); + auto cur_len = makeOP({shape_of, 1ll, 0ll}, {{"batch_dims", 0}}); + auto cur_len_i32 = makeOP({cur_len}, {{"destination_type", "i32"}}); + + auto past_len = makeOP({max_context_len, cur_len_i32}, {numpy_broadcast}); + auto past_len_i32 = makeOP({past_len}, {{"destination_type", "i32"}}); + return makeOP({past_len_i32, {1}}, {special_zero_true}); + } + + static std::shared_ptr gen_total_len(const std::shared_ptr& cur_len, + const std::shared_ptr& past_len) { + return makeOP({past_len, cur_len}, {numpy_broadcast}); + } + + static std::shared_ptr gen_V(const std::shared_ptr& qkv_proj, std::shared_ptr& head_size) { + auto current_V = makeOP({qkv_proj->output(2), {0, 0, 32, 128}}, {special_zero_true}); + auto gather = makeOP({{0, 2, 1, 3}, {0, 2, 1, 3}, 0ll}, {{"batch_dims", 0}}); + auto transpose = makeOP({current_V, gather}); + + auto shape_of = makeOP({transpose}, {{"output_type", "i64"}}); + auto gather_2 = makeOP({shape_of, -1ll, 0ll}, {{"batch_dims", 0}}); + head_size = makeOP({gather_2, 0}); + + return makeOP({transpose, {0, -1}}, {special_zero_true}); + } + + static std::shared_ptr gen_K(const std::shared_ptr& rope_K) { + auto gather = makeOP({{0, 2, 1, 3}, {0, 2, 1, 3}, 0ll}, {{"batch_dims", 0}}); + auto transpose = makeOP({rope_K, gather}); + return makeOP({transpose, {0, -1}}, {special_zero_true}); + } + + static std::shared_ptr gen_Q(const std::shared_ptr& total_seq_len, + const std::shared_ptr& rope_Q) { + auto _const_1 = makeConst(element::f32, {1, 32767, 1, 1}, MOCK_VALUE); + auto shape_of = makeOP({rope_Q}, {{"output_type", "i32"}}); + auto current_seq_len = makeOP({shape_of, {1}, 0ll}, {{"batch_dims", 0}}); + auto past_seq_len = makeOP({total_seq_len, current_seq_len}, {numpy_broadcast}); + + auto slice = makeOP({_const_1, past_seq_len, total_seq_len, {1}, {1}}); + auto mul = makeOP({rope_Q, slice}, {numpy_broadcast}); + auto transpose_1 = makeOP({mul, {0, 2, 1, 3}}); + + auto transpose_2 = makeOP({transpose_1, {0, 2, 1, 3}}); + return makeOP({transpose_2, {0, -1}}, {special_zero_true}); + } +}; + +} // namespace + +TEST_F(TransformationTestsF, SDPAToPA_Qwen) { + { + // Inputs to SDPA transformer: + auto beam_idx = makeOP({}, {{"shape", PartialShape{DYN}}, el_type_i64}); + auto position_ids = makeOP({}, {{"shape", PartialShape{DYN, DYN}}, el_type_i64}); + auto attention_mask = makeOP({}, {{"shape", PartialShape{DYN, DYN}}, el_type_i64}); + auto input_ids = makeOP({}, {{"shape", PartialShape{DYN, DYN}}, el_type_i64}); + ParameterVector params = nodes_to_params({position_ids, input_ids, attention_mask, beam_idx}); + + beam_idx->output(0).add_names({"beam_idx"}); + position_ids->output(0).add_names({"position_ids"}); + attention_mask->output(0).add_names({"attention_mask"}); + input_ids->output(0).add_names({"input_ids"}); + + // Embeddings processing: + auto embeddings = Qwen7bChatSDPA::gen_embeddings(input_ids); + auto qkv_proj = Qwen7bChatSDPA::gen_qkv_proj(embeddings); + + // KV cache: + auto k_cache = Qwen7bChatSDPA::gen_cache(input_ids, beam_idx, "K_cache"); + auto v_cache = Qwen7bChatSDPA::gen_cache(input_ids, beam_idx, "V_cache"); + + // Current/past/total Seq lengths calculation: + auto current_seq_len = Qwen7bChatSDPA::gen_current_len(input_ids); + auto past_seq_len = Qwen7bChatSDPA::gen_past_len(k_cache); + auto total_seq_len = Qwen7bChatSDPA::gen_total_len(current_seq_len, past_seq_len); + + // RoPE emb sin/cos init: + auto neg_cur_seq_len = Qwen7bChatSDPA::neg_mul(current_seq_len); + auto head_size = shared_ptr(); + auto rope_emb_sin = Qwen7bChatSDPA::gen_rope_emb_sin(total_seq_len, neg_cur_seq_len, head_size); + auto rope_emb_cos = Qwen7bChatSDPA::gen_rope_emb_cos(total_seq_len, neg_cur_seq_len); + + // RoPE for Q,K inputs: + auto rope_q = Qwen7bChatSDPA::gen_rope(QKV::Q, qkv_proj, head_size, rope_emb_sin, rope_emb_cos); + auto rope_k = Qwen7bChatSDPA::gen_rope(QKV::K, qkv_proj, head_size, rope_emb_sin, rope_emb_cos); + + // Lengths: + auto total_seq_len_2 = Qwen7bChatSDPA::gen_total_seq_len_2(past_seq_len, rope_k); + auto past_seq_len_2 = Qwen7bChatSDPA::gen_past_seq_len_2(total_seq_len_2, rope_q); + + // Q, K, V: + auto Q = Qwen7bChatSDPA::gen_Q(past_seq_len_2, total_seq_len_2, rope_q); + auto K = Qwen7bChatSDPA::gen_K(k_cache, rope_k); + auto V = Qwen7bChatSDPA::gen_V(v_cache, qkv_proj); + + // Attention mask: + auto attention_mask_to_sdpa = Qwen7bChatSDPA::gen_attention_mask(Q, attention_mask, total_seq_len_2); + + // SDPA: + auto sdpa = makeOP({Q, K, V, attention_mask_to_sdpa}, {{"causal", false}}); + auto res = makeOP({sdpa}); + + model = std::make_shared(OutputVector{res}, params); + manager.register_pass(); + } + + { + // Inputs to PA transformer: + auto max_context_len = makeOP({}, {{"shape", PartialShape{}}, el_type_i32}); + auto block_indices_begins = makeOP({}, {{"shape", PartialShape{DYN}}, el_type_i32}); + auto block_indices = makeOP({}, {{"shape", PartialShape{DYN}}, el_type_i32}); + auto subsequence_begins = makeOP({}, {{"shape", PartialShape{DYN}}, el_type_i32}); + auto past_lens = makeOP({}, {{"shape", PartialShape{DYN}}, el_type_i32}); + auto value_cache_0 = makeOP({}, {{"shape", PartialShape{DYN, 32, 128}}, el_type_f32}); + auto key_cache_0 = makeOP({}, {{"shape", PartialShape{DYN, 32, 128}}, el_type_f32}); + auto input_ids = makeOP({}, {{"shape", PartialShape{DYN}}, el_type_i64}); + auto position_ids = makeOP({}, {{"shape", PartialShape{DYN}}, el_type_i64}); + auto params = nodes_to_params({max_context_len, + block_indices_begins, + block_indices, + subsequence_begins, + past_lens, + value_cache_0, + key_cache_0, + input_ids, + position_ids}); + + // Inputs pre-processing: + auto max_context_len_i64 = makeOP({max_context_len}, {dest_type_i64}); + auto max_context_len_aligned = makeOP({max_context_len_i64, {1}}, {special_zero_true}); + auto input_ids_aligned = makeOP({input_ids, 1}); + auto position_ids_aligned = makeOP({position_ids, 1}); + + // Embeddings processing: + auto embeddings = Qwen7bChatPA::gen_embeddings(input_ids_aligned); + auto qkv_proj = Qwen7bChatPA::gen_qkv_proj(embeddings); + + // RoPE emb sin/cos init: + auto head_size = shared_ptr(); + auto rope_emb_sin = Qwen7bChatPA::gen_rope_emb_sin(max_context_len_aligned, position_ids_aligned, head_size); + auto rope_emb_cos = Qwen7bChatPA::gen_rope_emb_cos(max_context_len_aligned, position_ids_aligned); + + // rope Q, K: + auto rope_Q = Qwen7bChatPA::gen_rope(QKV::Q, qkv_proj, head_size, rope_emb_sin, rope_emb_cos); + auto rope_K = Qwen7bChatPA::gen_rope(QKV::K, qkv_proj, head_size, rope_emb_sin, rope_emb_cos); + + // Current/past/total Seq lengths calculation: + auto current_seq_len = Qwen7bChatPA::gen_current_len(rope_K); + auto past_seq_len = Qwen7bChatPA::gen_past_len(input_ids_aligned, max_context_len); + auto total_seq_len = Qwen7bChatPA::gen_total_len(current_seq_len, past_seq_len); + + // Q, K, V: + shared_ptr head_size_2; + auto Q = Qwen7bChatPA::gen_Q(total_seq_len, rope_Q); + auto K = Qwen7bChatPA::gen_K(rope_K); + auto V = Qwen7bChatPA::gen_V(qkv_proj, head_size_2); + + // Additional PA arguments: + auto sliding_window = std::make_shared(element::i32, Shape{}, 0); + auto alibi_slopes = std::make_shared(element::f32, Shape{0}); + auto scale = std::make_shared(element::f32, Shape{}, MOCK_VALUE); + + // PagedAttention: + auto pa = std::make_shared(OutputVector{Q, + K, + V, + key_cache_0, + value_cache_0, + past_lens, + subsequence_begins, + block_indices, + block_indices_begins, + scale, + sliding_window, + alibi_slopes, + max_context_len}); + pa->set_out_type(0, element::i64); + auto pa_aligned = Qwen7bChatPA::align_pa_layout(pa, head_size_2); + auto res = makeOP({pa_aligned}); + + model_ref = std::make_shared(OutputVector{res}, params); + } + // TODO: align precisions, check the copying of "fuse_names" attr in SDPAToPagedAttention + // checking the graph structure and names, other checks are temporarily disabled: + comparator.disable(FunctionsComparator::PRECISIONS); + disable_rt_info_check(); +} + +TEST_F(TransformationTestsF, SDPAToPA_TotalSequenceLengthPatternQwen) { + { + // Inputs to SDPA transformer: + auto beam_idx = makeOP({}, {{"shape", PartialShape{DYN}}, el_type_i64}); + auto input_ids = makeOP({}, {{"shape", PartialShape{DYN, DYN}}, el_type_i64}); + ParameterVector params = nodes_to_params({input_ids, beam_idx}); + + // K cache + auto k_cache = Qwen7bChatSDPA::gen_cache(input_ids, beam_idx, "K_cache"); + + // Current/past/total Seq lengths calculation: + auto current_len = Qwen7bChatSDPA::gen_current_len(input_ids); + auto past_len = Qwen7bChatSDPA::gen_past_len(k_cache); + auto total_len = Qwen7bChatSDPA::gen_total_len(current_len, past_len); + auto result = std::make_shared(total_len); + + // Expected that these Nodes to be created inside SDPAToPagedAttention + auto new_input_ids = std::make_shared(element::i64, PartialShape{DYN}); + auto axis = v0::Constant::create(element::i32, Shape{}, {1}); + auto aligned_input_ids = std::make_shared(new_input_ids, axis); + + input_ids->output(0).replace(aligned_input_ids); + auto max_context_len = std::make_shared(element::i32, PartialShape{}); + max_context_len->output(0).set_names({"max_context_len"}); + auto position_ids = std::make_shared(element::i64, PartialShape{DYN}); + position_ids->output(0).set_names({"position_ids"}); + + params.push_back(max_context_len); + params.push_back(new_input_ids); + + // Model and Transformations: + model = std::make_shared(ResultVector{result}, params); + manager.register_pass(aligned_input_ids, max_context_len, position_ids); + manager.register_pass(max_context_len); + } + + { + // Inputs to PA transformer: + auto max_context_len = makeOP({}, {{"shape", PartialShape{}}, el_type_i32}); + auto params = nodes_to_params({max_context_len}); + + // Inputs pre-processing: + auto max_context_len_i64 = makeOP({max_context_len}, {dest_type_i64}); + auto max_context_len_aligned = makeOP({max_context_len_i64, {1}}, {special_zero_true}); + + auto result = std::make_shared(max_context_len_aligned); + model_ref = std::make_shared(ResultVector{result}, params); + } + // TODO: align precisions, check the copying of "fuse_names" attr in SDPAToPagedAttention + // checking the graph structure and names, other checks are temporarily disabled: + comparator.disable(FunctionsComparator::PRECISIONS); + disable_result_friendly_names_check(); + disable_rt_info_check(); +} diff --git a/src/core/include/openvino/op/fake_convert.hpp b/src/core/include/openvino/op/fake_convert.hpp index c3eaa43b98a51b..16ef7a0337c15b 100644 --- a/src/core/include/openvino/op/fake_convert.hpp +++ b/src/core/include/openvino/op/fake_convert.hpp @@ -68,6 +68,7 @@ class OPENVINO_API FakeConvert : public Op { bool has_evaluate() const override; std::string get_destination_type() const; + void set_destination_type(ov::element::Type destination_type); const ov::element::Type& get_destination_element_type() const; private: diff --git a/src/core/include/openvino/pass/backward_graph_rewrite.hpp b/src/core/include/openvino/pass/backward_graph_rewrite.hpp index 7e7f6f118efe8d..cb7f24a76272bb 100644 --- a/src/core/include/openvino/pass/backward_graph_rewrite.hpp +++ b/src/core/include/openvino/pass/backward_graph_rewrite.hpp @@ -11,7 +11,7 @@ namespace ov { namespace pass { class OPENVINO_API BackwardGraphRewrite : public GraphRewrite { public: - OPENVINO_RTTI("ov::pass::BackwardGraphRewrite"); + OPENVINO_GRAPH_REWRITE_RTTI("ov::pass::BackwardGraphRewrite"); BackwardGraphRewrite() = default; diff --git a/src/core/include/openvino/pass/graph_rewrite.hpp b/src/core/include/openvino/pass/graph_rewrite.hpp index ec8e1339912513..4628875fef8fd9 100644 --- a/src/core/include/openvino/pass/graph_rewrite.hpp +++ b/src/core/include/openvino/pass/graph_rewrite.hpp @@ -8,8 +8,19 @@ #include #include +#include "openvino/core/rtti.hpp" #include "openvino/pass/matcher_pass.hpp" +#define _OPENVINO_GRAPH_REWRITE_RTTI_WITH_TYPE(TYPE_NAME) _OPENVINO_GRAPH_REWRITE_RTTI_WITH_TYPE_VERSION(TYPE_NAME, "0") + +#define _OPENVINO_GRAPH_REWRITE_RTTI_WITH_TYPE_VERSION(TYPE_NAME, VERSION_NAME) \ + _OPENVINO_RTTI_WITH_TYPE_VERSION_PARENT(TYPE_NAME, VERSION_NAME, ::ov::pass::GraphRewrite) + +#define OPENVINO_GRAPH_REWRITE_RTTI(...) \ + _OPENVINO_RTTI_EXPAND(_OPENVINO_RTTI_DEFINITION_SELECTOR_2(__VA_ARGS__, \ + _OPENVINO_GRAPH_REWRITE_RTTI_WITH_TYPE_VERSION, \ + _OPENVINO_GRAPH_REWRITE_RTTI_WITH_TYPE)(__VA_ARGS__)) + namespace ov { namespace pass { /// \brief GraphRewrite is a container for MatcherPasses that allows to run them on Function @@ -80,7 +91,7 @@ class OPENVINO_API GraphRewrite : public ModelPass { /// /// class ov::pass::LinFusions: public ov::pass::GraphRewrite { /// public: - /// OPENVINO_RTTI("LinFusion"); + /// OPENVINO_GRAPH_REWRITE_RTTI("LinFusion"); /// Fusions() { /// add_matcher(); /// add_matcher(); diff --git a/src/core/include/openvino/pass/sdpa_to_paged_attention.hpp b/src/core/include/openvino/pass/sdpa_to_paged_attention.hpp index 74aeacb0719cee..d52e78dbd6a489 100644 --- a/src/core/include/openvino/pass/sdpa_to_paged_attention.hpp +++ b/src/core/include/openvino/pass/sdpa_to_paged_attention.hpp @@ -19,7 +19,7 @@ class OPENVINO_API SDPAToPagedAttention : public ModelPass { public: OPENVINO_MODEL_PASS_RTTI("SDPAToPagedAttention"); - SDPAToPagedAttention(bool use_block_indices_inputs = false, bool use_score_outputs = false); + explicit SDPAToPagedAttention(bool use_block_indices_inputs = false, bool use_score_outputs = false); bool run_on_model(const std::shared_ptr& model) override; private: diff --git a/src/core/src/op/fake_convert.cpp b/src/core/src/op/fake_convert.cpp index 5b3c8f8d8e9938..517674402ef872 100644 --- a/src/core/src/op/fake_convert.cpp +++ b/src/core/src/op/fake_convert.cpp @@ -79,6 +79,10 @@ std::string FakeConvert::get_destination_type() const { return m_destination_type.get_type_name(); } +void FakeConvert::set_destination_type(ov::element::Type destination_type) { + m_destination_type = destination_type; +} + const ov::element::Type& FakeConvert::get_destination_element_type() const { return m_destination_type; } diff --git a/src/core/src/pass/graph_rewrite.cpp b/src/core/src/pass/graph_rewrite.cpp index 029f572189f829..f8a1f1e723d7a7 100644 --- a/src/core/src/pass/graph_rewrite.cpp +++ b/src/core/src/pass/graph_rewrite.cpp @@ -253,6 +253,8 @@ void ov::pass::GraphRewrite::set_pass_config(const std::shared_ptr& // For example: // // class ExampleGraphRewrite: public pass::GraphRewrite { + // public: + // OPENVINO_GRAPH_REWRITE_RTTI("ExampleGraphRewrite"); // ExampleGraphRewrite() { // add_mather(); // add_mather(); diff --git a/src/core/src/pass/manager.cpp b/src/core/src/pass/manager.cpp index a6f1fc287e221c..b084ec4dc38e09 100644 --- a/src/core/src/pass/manager.cpp +++ b/src/core/src/pass/manager.cpp @@ -5,6 +5,7 @@ #include "openvino/pass/manager.hpp" #include +#include #include #include #include diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp index 872e4539eda8df..e6fc744bb5ef4f 100644 --- a/src/core/src/pass/sdpa_to_paged_attention.cpp +++ b/src/core/src/pass/sdpa_to_paged_attention.cpp @@ -81,15 +81,12 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptrset_partial_shape(PartialShape{-1}); + auto input_ids_target_inputs = input_ids_node->get_output_target_inputs(0); auto unsqueezed_input_ids = std::make_shared(input_ids_node, v0::Constant::create(element::i32, Shape{}, {1})); - replace_node(input_ids_node, unsqueezed_input_ids); - - auto cur_seq_len = std::make_shared(std::make_shared(unsqueezed_input_ids), - v0::Constant::create(element::i64, Shape{}, {1}), - v0::Constant::create(element::i64, Shape{}, {0})); - auto prev_max_seq_len = - std::make_shared(max_context_len, std::make_shared(cur_seq_len, element::i32)); + for (const auto& target : input_ids_target_inputs) { + target.replace_source_output(unsqueezed_input_ids); + } ParameterVector kv_parameters; ParameterVector parameters_to_remove; @@ -106,15 +103,15 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptrset_partial_shape(PartialShape{-1}); position_ids->validate_and_infer_types(); } + auto position_ids_target_inputs = position_ids->get_output_target_inputs(0); auto unsqueezed_position_ids = std::make_shared(position_ids, v0::Constant::create(element::i32, Shape{}, {1})); - replace_node(position_ids, unsqueezed_position_ids); + for (const auto& target : position_ids_target_inputs) { + target.replace_source_output(unsqueezed_position_ids); + } int layer_index = 0; - auto batch_dim = - std::make_shared(position_ids); // it is not always required, so will be disposed if not needed - ov::pass::Manager manager("SDPA to PA"); manager.set_per_pass_validation(false); manager.register_pass(kv_parameters, @@ -127,9 +124,12 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr(prev_max_seq_len, batch_dim); + + manager.register_pass(unsqueezed_input_ids, max_context_len, position_ids); manager.register_pass(max_context_len); - manager.register_pass(unsqueezed_position_ids->output(0)); + manager.register_pass(max_context_len); + manager.register_pass(unsqueezed_position_ids); + manager.register_pass(unsqueezed_position_ids); manager.run_passes(model); { diff --git a/src/core/template_extension/CMakeLists.txt b/src/core/template_extension/CMakeLists.txt index aa8030e78d7171..3cfcfcd058ff94 100644 --- a/src/core/template_extension/CMakeLists.txt +++ b/src/core/template_extension/CMakeLists.txt @@ -8,12 +8,15 @@ set(CMAKE_CXX_STANDARD 11) set(TARGET_NAME "openvino_template_extension") # The OpenVINO installed from PyPI can be used to find OpenVINO_DIR -find_package(Python3 REQUIRED) -execute_process( - COMMAND ${Python3_EXECUTABLE} -c "from openvino.utils import get_cmake_path; print(get_cmake_path(), end='')" - OUTPUT_VARIABLE OpenVINO_DIR_PY - ERROR_QUIET -) +if(NOT CMAKE_CROSSCOMPILING) + find_package(Python3 QUIET COMPONENTS Interpreter) + if(Python3_Interpreter_FOUND) + execute_process( + COMMAND ${Python3_EXECUTABLE} -c "from openvino.utils import get_cmake_path; print(get_cmake_path(), end='')" + OUTPUT_VARIABLE OpenVINO_DIR_PY + ERROR_QUIET) + endif() +endif() find_package(OpenVINO REQUIRED PATHS "${OpenVINO_DIR_PY}") diff --git a/src/core/tests/graph_rewrite.cpp b/src/core/tests/graph_rewrite.cpp index 20955f5a5d6b1f..c47b6d5a473666 100644 --- a/src/core/tests/graph_rewrite.cpp +++ b/src/core/tests/graph_rewrite.cpp @@ -58,7 +58,7 @@ class GatherNodesPass : public ov::pass::MatcherPass { class Anchor : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("Anchor"); + OPENVINO_GRAPH_REWRITE_RTTI("Anchor"); Anchor() : GraphRewrite() {} }; diff --git a/src/core/tests/pass_config.cpp b/src/core/tests/pass_config.cpp index 053cb2b62aff32..56d9b2fedf8197 100644 --- a/src/core/tests/pass_config.cpp +++ b/src/core/tests/pass_config.cpp @@ -66,7 +66,7 @@ class TestModelPass : public pass::ModelPass { class TestGraphRewritePass : public pass::GraphRewrite { public: - OPENVINO_RTTI("TestGraphRewritePass"); + OPENVINO_GRAPH_REWRITE_RTTI("TestGraphRewritePass"); TestGraphRewritePass() { add_matcher(); add_matcher(); @@ -284,7 +284,7 @@ class TestNestedMatcher : public ov::pass::MatcherPass { class TestNestedGraphRewrite : public pass::GraphRewrite { public: - OPENVINO_RTTI("TestNestedGraphRewrite"); + OPENVINO_GRAPH_REWRITE_RTTI("TestNestedGraphRewrite"); TestNestedGraphRewrite() { add_matcher(); } diff --git a/src/core/tests/pattern.cpp b/src/core/tests/pattern.cpp index 982e59b55f0f97..5bb961e57db1c2 100644 --- a/src/core/tests/pattern.cpp +++ b/src/core/tests/pattern.cpp @@ -82,6 +82,8 @@ static std::shared_ptr construct_mean_graph() { class TestGraphRewrite : public ov::pass::GraphRewrite { public: + OPENVINO_GRAPH_REWRITE_RTTI("TestGraphRewrite"); + void construct_multiply_by_one() { // pattern #1 : a * 1 = a auto iconst1 = construct_constant_node(1); diff --git a/src/frontends/onnx/tests/__init__.py b/src/frontends/onnx/tests/__init__.py index ef8cebfa361e3f..fdf1295dfd1dbe 100644 --- a/src/frontends/onnx/tests/__init__.py +++ b/src/frontends/onnx/tests/__init__.py @@ -147,7 +147,7 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): skip_dynamic_model = pytest.mark.skip(reason="CPU plug-in can't load a model with dynamic output shapes via legacy API") # ONNX 1.14 -xfail_issue_119896 = xfail_test(reason="Unsupported element type: FLOAT8") +xfail_issue_119896 = xfail_test(reason="Unsupported element type: FLOAT8", strict=False) xfail_issue_119900 = xfail_test(reason="While validating ONNX node '': " "half_pixel_symmetric - this type of coordinate transformation mode " "is not supported. Choose one of the following modes: " diff --git a/src/frontends/paddle/src/default_opset.hpp b/src/frontends/paddle/src/default_opset.hpp index c3eed5b5653c92..a5dc374964d485 100644 --- a/src/frontends/paddle/src/default_opset.hpp +++ b/src/frontends/paddle/src/default_opset.hpp @@ -2,13 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "openvino/opsets/opset9.hpp" +#include "openvino/opsets/opset14.hpp" namespace ov { namespace frontend { namespace paddle { namespace op { -namespace default_opset = ov::opset9; +namespace default_opset = ov::opset14; } // namespace op } // namespace paddle diff --git a/src/frontends/paddle/src/op/elu.cpp b/src/frontends/paddle/src/op/elu.cpp new file mode 100644 index 00000000000000..c51a2af6f9f176 --- /dev/null +++ b/src/frontends/paddle/src/op/elu.cpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "default_opset.hpp" +#include "openvino/frontend/paddle/node_context.hpp" +#include "openvino/frontend/paddle/visibility.hpp" + +namespace ov { +namespace frontend { +namespace paddle { +namespace op { +NamedOutputs elu(const NodeContext& node) { + auto data = node.get_input("X"); + auto alpha = node.get_attribute("alpha", 1.0); + const auto& elu_node = std::make_shared(data, alpha); + return node.default_single_output_mapping({elu_node}, {"Out"}); +} + +} // namespace op +} // namespace paddle +} // namespace frontend +} // namespace ov diff --git a/src/frontends/paddle/src/op/expand_v2.cpp b/src/frontends/paddle/src/op/expand_v2.cpp index d79e49db286c13..ea174efa3a9920 100644 --- a/src/frontends/paddle/src/op/expand_v2.cpp +++ b/src/frontends/paddle/src/op/expand_v2.cpp @@ -19,8 +19,16 @@ NamedOutputs expand_v2(const NodeContext& node) { auto inputs = node.get_ng_inputs("expand_shapes_tensor"); ov::NodeVector node_vec; for (auto& input : inputs) { + if (input.get_partial_shape().rank().get_length() == 0) { + // should unsqueeze the input with non-shape. + auto unsqueeze_scalar = default_opset::Constant::create(ov::element::i32, {}, {0}); + input = std::make_shared(input, unsqueeze_scalar); + } + PADDLE_OP_CHECK(node, + input.get_partial_shape().rank().get_length() == 1, + "the rank of conv input must == 1"); auto cast = std::make_shared(input, element::i32); - node_vec.push_back(cast); + node_vec.emplace_back(cast); } shape_expected_node = std::make_shared(node_vec, 0); } else { diff --git a/src/frontends/paddle/src/op/eye.cpp b/src/frontends/paddle/src/op/eye.cpp new file mode 100644 index 00000000000000..3734d6fab44817 --- /dev/null +++ b/src/frontends/paddle/src/op/eye.cpp @@ -0,0 +1,36 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "default_opset.hpp" +#include "openvino/frontend/paddle/node_context.hpp" + +namespace ov { +namespace frontend { +namespace paddle { +namespace op { +NamedOutputs eye(const NodeContext& node) { + auto row = node.get_attribute("num_rows"); + auto col = node.get_attribute("num_columns", row); + auto dtype = node.get_attribute("dtype", ov::element::f32); + + const auto& row_node = std::make_shared(ov::element::i64, Shape{}, (row)); + const auto& col_node = std::make_shared(ov::element::i64, Shape{}, (col)); + const auto& diagonal_index_node = std::make_shared(ov::element::i32, Shape{}, (0)); + + std::shared_ptr out_node; + if (dtype == ov::element::i32 || dtype == ov::element::i64) { + out_node = std::make_shared(row_node, col_node, diagonal_index_node, dtype); + } else { + const auto& eye_node = + std::make_shared(row_node, col_node, diagonal_index_node, ov::element::i32); + out_node = std::make_shared(eye_node, dtype); + } + + return node.default_single_output_mapping({out_node}, {"Out"}); +} + +} // namespace op +} // namespace paddle +} // namespace frontend +} // namespace ov diff --git a/src/frontends/paddle/src/op/fill_constant.cpp b/src/frontends/paddle/src/op/fill_constant.cpp index b066fdfbe7a0c7..4a674b61d10c86 100644 --- a/src/frontends/paddle/src/op/fill_constant.cpp +++ b/src/frontends/paddle/src/op/fill_constant.cpp @@ -29,6 +29,10 @@ NamedOutputs fill_constant(const NodeContext& node) { PADDLE_OP_CHECK(node, false, "fill_constant only supports i32, f32, i64"); } + if (shape.empty()) { + shape.emplace_back(1); + } + PADDLE_OP_CHECK(node, shape.size() > 0 || node.has_input("ShapeTensor") || node.has_input("ShapeTensorList"), "fill_constant shape not set"); diff --git a/src/frontends/paddle/src/op/interp.cpp b/src/frontends/paddle/src/op/interp.cpp index e7b317f2888a83..5ab551dc3bdde2 100644 --- a/src/frontends/paddle/src/op/interp.cpp +++ b/src/frontends/paddle/src/op/interp.cpp @@ -4,6 +4,7 @@ #include "default_opset.hpp" #include "openvino/frontend/paddle/node_context.hpp" +#include "openvino/opsets/opset4.hpp" namespace ov { namespace frontend { @@ -147,8 +148,9 @@ static NamedOutputs interpolate(const NodeContext& node, attrs.pads_begin = {0, 0, 0, 0}; attrs.pads_end = {0, 0, 0, 0}; - return node.default_single_output_mapping({std::make_shared(x, target_spatial_shape, scales, attrs)}, - {"Out"}); + return node.default_single_output_mapping( + {std::make_shared(x, target_spatial_shape, scales, attrs)}, + {"Out"}); } NamedOutputs linear_interp_v2(const NodeContext& node) { diff --git a/src/frontends/paddle/src/op/reduce_ops.hpp b/src/frontends/paddle/src/op/reduce_ops.hpp index 2b595160420282..954d1de425c924 100644 --- a/src/frontends/paddle/src/op/reduce_ops.hpp +++ b/src/frontends/paddle/src/op/reduce_ops.hpp @@ -31,6 +31,10 @@ NamedOutputs reduce_ops(const NodeContext& node) { dims = node.get_attribute>("dim"); } + std::transform(dims.begin(), dims.end(), dims.begin(), [&input_rank](int64_t value) { + return value >= 0 ? value : value + input_rank; + }); + int64_t axis_size = static_cast(dims.size()); reduce_all = reduce_all || (axis_size == input_rank || axis_size == 0); diff --git a/src/frontends/paddle/src/op_table.cpp b/src/frontends/paddle/src/op_table.cpp index 769492eb13d1b8..e092f16095abe0 100644 --- a/src/frontends/paddle/src/op_table.cpp +++ b/src/frontends/paddle/src/op_table.cpp @@ -39,9 +39,11 @@ OP_CONVERTER(elementwise_sub); OP_CONVERTER(equal); OP_CONVERTER(greater_equal); OP_CONVERTER(not_equal); +OP_CONVERTER(elu); OP_CONVERTER(embedding); OP_CONVERTER(exp); OP_CONVERTER(expand_v2); +OP_CONVERTER(eye); OP_CONVERTER(flip); OP_CONVERTER(flatten_contiguous_range); OP_CONVERTER(floor); @@ -173,9 +175,11 @@ std::map get_supported_ops() { {"elementwise_sub", op::elementwise_sub}, {"dropout", op::dropout}, {"elementwise_pow", op::elementwise_pow}, + {"elu", op::elu}, {"equal", op::equal}, {"exp", op::exp}, {"expand_v2", op::expand_v2}, + {"eye", op::eye}, {"fill_any_like", op::fill_any_like}, {"fill_constant", op::fill_constant}, {"fill_constant_batch_size_like", op::fill_constant_batch_size_like}, diff --git a/src/frontends/paddle/tests/op_fuzzy.cpp b/src/frontends/paddle/tests/op_fuzzy.cpp index 99357a3a336d01..53ea7852604376 100644 --- a/src/frontends/paddle/tests/op_fuzzy.cpp +++ b/src/frontends/paddle/tests/op_fuzzy.cpp @@ -188,6 +188,7 @@ static const std::vector models{ std::string("elementwise_floordiv_int64_2/elementwise_floordiv_int64_2.pdmodel"), std::string("elementwise_floordiv_int64_3/elementwise_floordiv_int64_3.pdmodel"), std::string("elementwise_mul_bool1/elementwise_mul_bool1.pdmodel"), + std::string("elu/elu.pdmodel"), std::string("embedding_0/embedding_0.pdmodel"), std::string("embedding_sparse/embedding_sparse.pdmodel"), std::string("embedding_none_weight/embedding_none_weight.pdmodel"), @@ -201,6 +202,9 @@ static const std::vector models{ std::string("expand_v2_tensor_list/expand_v2_tensor_list.pdmodel"), std::string("expand_v2_tensor_list2/expand_v2_tensor_list2.pdmodel"), std::string("exp_test_float32/exp_test_float32.pdmodel"), + std::string("eye/eye.pdmodel"), + std::string("eye_int32/eye_int32.pdmodel"), + std::string("eye_int64/eye_int64.pdmodel"), std::string("flip_1/flip_1.pdmodel"), std::string("flip_2/flip_2.pdmodel"), std::string("flip_3/flip_3.pdmodel"), diff --git a/src/frontends/paddle/tests/test_models/gen_scripts/generate_elu.py b/src/frontends/paddle/tests/test_models/gen_scripts/generate_elu.py new file mode 100644 index 00000000000000..4dc67b2051222b --- /dev/null +++ b/src/frontends/paddle/tests/test_models/gen_scripts/generate_elu.py @@ -0,0 +1,44 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# +# relu6 paddle model generator +# +import numpy as np +from save_model import saveModel +import paddle +import sys + + +def elu(name: str, x, alpha=None, data_type='float32'): + paddle.enable_static() + + with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): + node_x = paddle.static.data(name='x', shape=x.shape, dtype=data_type) + + if paddle.__version__ >= '2.0.0': + out = paddle.nn.functional.elu(node_x, alpha, name='elu') + else: + out = paddle.fluid.layers.elu(node_x, alpha, name='elu') + cpu = paddle.static.cpu_places(1) + exe = paddle.static.Executor(cpu[0]) + # startup program will call initializer to initialize the parameters. + exe.run(paddle.static.default_startup_program()) + + outs = exe.run( + feed={'x': x}, + fetch_list=[out]) + + saveModel(name, exe, feed_vars=[node_x], fetchlist=[out], + inputs=[x], outputs=[outs[0]], target_dir=sys.argv[1]) + + return outs[0] + + +def main(): + data_type = 'float32' + data = np.random.randn(2, 3, 4).astype('float32') + elu("elu", data) + +if __name__ == "__main__": + main() diff --git a/src/frontends/paddle/tests/test_models/gen_scripts/generate_eye.py b/src/frontends/paddle/tests/test_models/gen_scripts/generate_eye.py new file mode 100644 index 00000000000000..9b1a4f668c3ab2 --- /dev/null +++ b/src/frontends/paddle/tests/test_models/gen_scripts/generate_eye.py @@ -0,0 +1,41 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# +# fill_const paddle model generator +# +import numpy as np +from save_model import saveModel +import paddle +import sys + + +def eye(name : str, rows, cols = None, dtype = None): + paddle.enable_static() + with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): + if paddle.__version__ >= '2.0.0': + x1 = paddle.eye(num_rows=rows, num_columns=cols, dtype=dtype, name='fill') + x2 = paddle.eye(num_rows=rows, num_columns=cols, dtype=dtype, name='fill') + else: + x1 = paddle.fluid.layers.eye(num_rows=rows, num_columns=cols, dtype=dtype, name='fill_constant') + x2 = paddle.fluid.layers.eye(num_rows=rows, num_columns=cols, dtype=dtype, name='fill_constant') + out = paddle.add(x1, x2) + cpu = paddle.static.cpu_places(1) + exe = paddle.static.Executor(cpu[0]) + # startup program will call initializer to initialize the parameters. + exe.run(paddle.static.default_startup_program()) + + outs = exe.run( + fetch_list=[out]) + + saveModel(name, exe, feed_vars=[], fetchlist=[out], inputs=[], outputs=[outs[0]], target_dir=sys.argv[1]) + + return outs[0] + +def main(): + eye("eye", 3) + eye("eye_int32", 2, 3, "int32") + eye("eye_int64", 2, 3, "int64") + +if __name__ == "__main__": + main() diff --git a/src/inference/src/os/lin/lin_system_conf.cpp b/src/inference/src/os/lin/lin_system_conf.cpp index f8bd16173b8fce..29c8bfddbd1ca4 100644 --- a/src/inference/src/os/lin/lin_system_conf.cpp +++ b/src/inference/src/os/lin/lin_system_conf.cpp @@ -23,76 +23,107 @@ CPU::CPU() { std::vector> system_info_table; std::vector node_info_table; - auto get_cache_info_linux = [&]() { + constexpr int cache_info_mode = 1; + constexpr int freq_info_mode = 2; + + auto get_info_linux = [&](int mode) { int cpu_index = 0; - int cache_index = 0; - int cache_files = 3; + int file_index = 0; + int max_files = 3; - std::vector one_info(cache_files); + std::string one_info; - while (1) { - for (int n = 0; n < cache_files; n++) { - cache_index = (n == 0) ? n : n + 1; - - std::ifstream cache_file("/sys/devices/system/cpu/cpu" + std::to_string(cpu_index) + "/cache/index" + - std::to_string(cache_index) + "/shared_cpu_list"); - if (!cache_file.is_open()) { - cache_index = -1; - break; - } - std::string cache_info; - std::getline(cache_file, cache_info); - one_info[n] = std::move(cache_info); - } + std::string::size_type pos = 0; + std::string::size_type endpos = 0; + std::string sub_str; - if (cache_index == -1) { - if (cpu_index == 0) { - return -1; - } else { - return 0; - } - } else { - system_info_table.push_back(one_info); - cpu_index++; - } + int core_1; + int core_2; + + system_info_table.clear(); + + std::ifstream possible_file("/sys/devices/system/cpu/possible"); + std::string possible_info; + + if (possible_file.is_open()) { + std::getline(possible_file, possible_info); + } else { + return -1; } - return 0; - }; + if ((endpos = possible_info.find('-', pos)) != std::string::npos) { + sub_str = possible_info.substr(pos, endpos - pos); + core_1 = std::stoi(sub_str); + sub_str = possible_info.substr(endpos + 1); + core_2 = std::stoi(sub_str); + system_info_table.resize(core_2 + 1, std::vector(max_files, "")); + } else { + return -1; + } - auto get_freq_info_linux = [&]() { - int cpu_index = 0; - int cache_index = 0; + std::ifstream online_file("/sys/devices/system/cpu/online"); + std::string online_info; - std::vector file_name = {"/topology/core_cpus_list", - "/topology/physical_package_id", - "/cpufreq/cpuinfo_max_freq"}; - int num_of_files = file_name.size(); - std::vector one_info(num_of_files); + if (online_file.is_open()) { + std::getline(online_file, online_info); + } else { + system_info_table.clear(); + return -1; + } while (1) { - for (int n = 0; n < num_of_files; n++) { - cache_index = n; + if ((endpos = online_info.find('-', pos)) != std::string::npos) { + sub_str = online_info.substr(pos, endpos - pos); + core_1 = std::stoi(sub_str); + sub_str = online_info.substr(endpos + 1); + core_2 = std::stoi(sub_str); - std::ifstream cache_file("/sys/devices/system/cpu/cpu" + std::to_string(cpu_index) + file_name[n]); - if (!cache_file.is_open()) { - cache_index = -1; - break; + for (cpu_index = core_1; cpu_index <= core_2; cpu_index++) { + if (mode == cache_info_mode) { + for (int n = 0; n < max_files; n++) { + file_index = (n == 0) ? n : n + 1; + one_info.clear(); + + std::ifstream cache_file("/sys/devices/system/cpu/cpu" + std::to_string(cpu_index) + + "/cache/index" + std::to_string(file_index) + "/shared_cpu_list"); + if (cache_file.is_open()) { + std::getline(cache_file, one_info); + } else { + if ((cpu_index == core_1) && (n == 0)) { + system_info_table.clear(); + return -1; + } + } + system_info_table[cpu_index][n] = std::move(one_info); + } + } else { + std::vector file_name = {"/topology/core_cpus_list", + "/topology/physical_package_id", + "/cpufreq/cpuinfo_max_freq"}; + + for (int n = 0; n < max_files; n++) { + one_info.clear(); + + std::ifstream cache_file("/sys/devices/system/cpu/cpu" + std::to_string(cpu_index) + + file_name[n]); + if (cache_file.is_open()) { + std::getline(cache_file, one_info); + } else { + if ((cpu_index == core_1) && (n == 2)) { + system_info_table.clear(); + return -1; + } + } + system_info_table[cpu_index][n] = std::move(one_info); + } + } } - std::string cache_info; - std::getline(cache_file, cache_info); - one_info[n] = std::move(cache_info); } - if (cache_index == -1) { - if (cpu_index == 0) { - return -1; - } else { - return 0; - } + if ((pos = online_info.find(',', endpos)) != std::string::npos) { + pos++; } else { - system_info_table.push_back(one_info); - cpu_index++; + break; } } @@ -190,20 +221,23 @@ CPU::CPU() { } else { _processors = valid_cpu_mapping_table.size(); _cpu_mapping_table.swap(valid_cpu_mapping_table); - update_valid_processor_linux(std::move(phy_core_list), - _numa_nodes, - _cores, - _proc_type_table, - _cpu_mapping_table); + { + std::lock_guard lock{_cpu_mutex}; + update_valid_processor_linux(std::move(phy_core_list), + _numa_nodes, + _cores, + _proc_type_table, + _cpu_mapping_table); + } return 0; } }; get_node_info_linux(); - if (!get_cache_info_linux()) { + if (!get_info_linux(cache_info_mode)) { parse_cache_info_linux(system_info_table, - node_info_table, + std::move(node_info_table), _processors, _numa_nodes, _sockets, @@ -215,9 +249,9 @@ CPU::CPU() { if ((_proc_type_table.size() == 0) || ((_proc_type_table[0][MAIN_CORE_PROC] == 0) && (_proc_type_table[0][ALL_PROC] > 0) && (_proc_type_table[0][ALL_PROC] != _proc_type_table[0][EFFICIENT_CORE_PROC]))) { - if (!get_freq_info_linux()) { + if (!get_info_linux(freq_info_mode)) { parse_freq_info_linux(system_info_table, - node_info_table, + std::move(node_info_table), _processors, _numa_nodes, _sockets, @@ -471,56 +505,73 @@ void parse_cache_info_linux(const std::vector> system_i const std::vector line_value_0({0, 0, 0, 0, -1, -1}); - for (int n = 0; n < _processors; n++) { - if (-1 == _cpu_mapping_table[n][CPU_MAP_SOCKET_ID]) { - std::string::size_type pos = 0; - std::string::size_type endpos = 0; - std::string sub_str; - - int core_1; - int core_2; + std::vector offline_list; + int info_index = 0; - if (0 == _sockets) { - _proc_type_table.push_back(line_value_0); - } else { - _proc_type_table.push_back(_proc_type_table[0]); - _proc_type_table[0] = line_value_0; - } - - while (1) { - if ((endpos = system_info_table[n][2].find('-', pos)) != std::string::npos) { - sub_str = system_info_table[n][2].substr(pos, endpos - pos); - core_1 = std::stoi(sub_str); - sub_str = system_info_table[n][2].substr(endpos + 1); - core_2 = std::stoi(sub_str); + for (int n = 0; n < _processors; n++) { + if ((system_info_table[n][2].size() > 0) || (system_info_table[n][1].size() > 0)) { + info_index = system_info_table[n][2].size() > 0 ? 2 : 1; + if (-1 == _cpu_mapping_table[n][CPU_MAP_SOCKET_ID]) { + std::string::size_type pos = 0; + std::string::size_type endpos = 0; + std::string sub_str; + + int core_1; + int core_2; + + if (0 == _sockets) { + _proc_type_table.push_back(line_value_0); + } else { + _proc_type_table.push_back(_proc_type_table[0]); + _proc_type_table[0] = line_value_0; + } - for (int m = core_1; m <= core_2; m++) { - _cpu_mapping_table[m][CPU_MAP_SOCKET_ID] = _sockets; - _cpu_mapping_table[m][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[m][CPU_MAP_SOCKET_ID]; - update_proc_map_info(m); + while (1) { + if ((endpos = system_info_table[n][info_index].find('-', pos)) != std::string::npos) { + sub_str = system_info_table[n][info_index].substr(pos, endpos - pos); + core_1 = std::stoi(sub_str); + sub_str = system_info_table[n][info_index].substr(endpos + 1); + core_2 = std::stoi(sub_str); + + if ((info_index == 1) && (core_2 - core_1 == 1)) { + offline_list.push_back(n); + break; + } + for (int m = core_1; m <= core_2; m++) { + _cpu_mapping_table[m][CPU_MAP_SOCKET_ID] = _sockets; + _cpu_mapping_table[m][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[m][CPU_MAP_SOCKET_ID]; + update_proc_map_info(m); + if (_processors == 0) { + return; + }; + } + } else if (pos != std::string::npos) { + sub_str = system_info_table[n][info_index].substr(pos); + core_1 = std::stoi(sub_str); + _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = _sockets; + _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = + _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; + update_proc_map_info(core_1); if (_processors == 0) { return; }; + endpos = pos; } - } else if (pos != std::string::npos) { - sub_str = system_info_table[n][2].substr(pos); - core_1 = std::stoi(sub_str); - _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = _sockets; - _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; - update_proc_map_info(core_1); - if (_processors == 0) { - return; - }; - endpos = pos; - } - if ((pos = system_info_table[n][2].find(',', endpos)) != std::string::npos) { - pos++; - } else { - break; + if ((pos = system_info_table[n][2].find(',', endpos)) != std::string::npos) { + pos++; + } else { + break; + } + } + _sockets++; + if (_proc_type_table[0][ALL_PROC] == 0) { + _proc_type_table.erase(_proc_type_table.begin()); + _sockets--; } } - _sockets++; + } else { + offline_list.push_back(n); } } @@ -540,6 +591,11 @@ void parse_cache_info_linux(const std::vector> system_i _numa_nodes = node_info_table.size(); parse_node_info_linux(node_info_table, _numa_nodes, _sockets, _proc_type_table, _cpu_mapping_table); } + + for (size_t n = 0; n < offline_list.size(); n++) { + _cpu_mapping_table.erase(_cpu_mapping_table.begin() + offline_list[n] - n); + _processors--; + } }; void get_cpu_mapping_from_cores(const int _processors, @@ -615,7 +671,6 @@ void parse_freq_info_linux(const std::vector> system_in std::vector>& _cpu_mapping_table) { int freq_max = 0; bool ecore_enabled = false; - bool ht_enabled = false; _processors = system_info_table.size(); _numa_nodes = 0; @@ -625,6 +680,8 @@ void parse_freq_info_linux(const std::vector> system_in std::vector line_value_0(PROC_TYPE_TABLE_SIZE, 0); + std::vector offline_list; + auto clean_up_output = [&]() { _processors = 0; _cores = 0; @@ -636,65 +693,68 @@ void parse_freq_info_linux(const std::vector> system_in }; for (int n = 0; n < _processors; n++) { - if (-1 == _cpu_mapping_table[n][CPU_MAP_SOCKET_ID]) { - std::string::size_type pos = 0; - std::string::size_type endpos1 = 0; - std::string::size_type endpos2 = 0; - std::string sub_str; - - int core_1 = 0; - int core_2 = 0; - - if (((endpos1 = system_info_table[n][0].find(',', pos)) != std::string::npos) || - ((endpos2 = system_info_table[n][0].find('-', pos)) != std::string::npos)) { - endpos1 = (endpos1 != std::string::npos) ? endpos1 : endpos2; - sub_str = system_info_table[n][0].substr(pos, endpos1 - pos); - core_1 = std::stoi(sub_str); - sub_str = system_info_table[n][0].substr(endpos1 + 1); - core_2 = std::stoi(sub_str); - if ((core_1 != n) && (core_2 != n)) { - clean_up_output(); - return; - } - - _cpu_mapping_table[core_1][CPU_MAP_PROCESSOR_ID] = core_1; - _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = std::stoi(system_info_table[core_1][1]); - _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; - _cpu_mapping_table[core_1][CPU_MAP_CORE_ID] = _cores; - _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = HYPER_THREADING_PROC; - _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID] = _cores; + if (system_info_table[n][2].size() > 0) { + if (-1 == _cpu_mapping_table[n][CPU_MAP_SOCKET_ID]) { + std::string::size_type pos = 0; + std::string::size_type endpos1 = 0; + std::string::size_type endpos2 = 0; + std::string sub_str; + + int core_1 = 0; + int core_2 = 0; + + if (((endpos1 = system_info_table[n][0].find(',', pos)) != std::string::npos) || + ((endpos2 = system_info_table[n][0].find('-', pos)) != std::string::npos)) { + endpos1 = (endpos1 != std::string::npos) ? endpos1 : endpos2; + sub_str = system_info_table[n][0].substr(pos, endpos1 - pos); + core_1 = std::stoi(sub_str); + sub_str = system_info_table[n][0].substr(endpos1 + 1); + core_2 = std::stoi(sub_str); + if ((core_1 != n) && (core_2 != n)) { + clean_up_output(); + return; + } - _cpu_mapping_table[core_2][CPU_MAP_PROCESSOR_ID] = core_2; - _cpu_mapping_table[core_2][CPU_MAP_SOCKET_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; - _cpu_mapping_table[core_2][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; - _cpu_mapping_table[core_2][CPU_MAP_CORE_ID] = _cpu_mapping_table[core_1][CPU_MAP_CORE_ID]; - _cpu_mapping_table[core_2][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC; - _cpu_mapping_table[core_2][CPU_MAP_GROUP_ID] = _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID]; + _cpu_mapping_table[core_1][CPU_MAP_PROCESSOR_ID] = core_1; + _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = std::stoi(system_info_table[core_1][1]); + _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; + _cpu_mapping_table[core_1][CPU_MAP_CORE_ID] = _cores; + _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = HYPER_THREADING_PROC; + _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID] = _cores; + + _cpu_mapping_table[core_2][CPU_MAP_PROCESSOR_ID] = core_2; + _cpu_mapping_table[core_2][CPU_MAP_SOCKET_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; + _cpu_mapping_table[core_2][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; + _cpu_mapping_table[core_2][CPU_MAP_CORE_ID] = _cpu_mapping_table[core_1][CPU_MAP_CORE_ID]; + _cpu_mapping_table[core_2][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC; + _cpu_mapping_table[core_2][CPU_MAP_GROUP_ID] = _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID]; + + int core_freq = std::stoi(system_info_table[core_1][2]); + freq_max = std::max(core_freq, freq_max); + } else if (system_info_table[n][0].size() > 0) { + core_1 = std::stoi(system_info_table[n][0]); - ht_enabled = true; - int core_freq = std::stoi(system_info_table[core_1][2]); - freq_max = std::max(core_freq, freq_max); - } else if (system_info_table[n][0].size() > 0) { - core_1 = std::stoi(system_info_table[n][0]); + _cpu_mapping_table[core_1][CPU_MAP_PROCESSOR_ID] = core_1; + _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = std::stoi(system_info_table[core_1][1]); + _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; + _cpu_mapping_table[core_1][CPU_MAP_CORE_ID] = _cores; - _cpu_mapping_table[core_1][CPU_MAP_PROCESSOR_ID] = core_1; - _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = std::stoi(system_info_table[core_1][1]); - _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; - _cpu_mapping_table[core_1][CPU_MAP_CORE_ID] = _cores; + int core_freq = std::stoi(system_info_table[core_1][2]); + if ((0 == freq_max) || (core_freq >= freq_max * 0.97)) { + freq_max = std::max(core_freq, freq_max); + _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC; + } else { + _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = EFFICIENT_CORE_PROC; + ecore_enabled = true; + } - int core_freq = std::stoi(system_info_table[core_1][2]); - if (((0 == freq_max) || (core_freq >= freq_max * 0.95)) && (!ht_enabled)) { - freq_max = std::max(core_freq, freq_max); - _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC; - } else { - _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = EFFICIENT_CORE_PROC; - ecore_enabled = true; + _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID] = _cores; } - - _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID] = _cores; + _sockets = std::max(_sockets, _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]); + _cores++; } - _sockets = std::max(_sockets, _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]); - _cores++; + } else { + offline_list.push_back(n); } } @@ -733,6 +793,11 @@ void parse_freq_info_linux(const std::vector> system_in _numa_nodes = node_info_table.size(); parse_node_info_linux(node_info_table, _numa_nodes, _sockets, _proc_type_table, _cpu_mapping_table); } + + for (size_t n = 0; n < offline_list.size(); n++) { + _cpu_mapping_table.erase(_cpu_mapping_table.begin() + offline_list[n] - n); + _processors--; + } }; void update_valid_processor_linux(const std::vector phy_core_list, diff --git a/src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp b/src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp index 8679090b9ae491..9ea43bd0604296 100644 --- a/src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp +++ b/src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp @@ -385,6 +385,188 @@ LinuxCpuMapTestCase cache_1sockets_96cores = { {"0-95"}, }, }; +LinuxCpuMapTestCase cache_2sockets_56cores_hyperthreading = { + 110, + 2, + 2, + 56, + {{110, 56, 0, 54, -1, -1}, {54, 28, 0, 26, 0, 0}, {56, 28, 0, 28, 1, 1}}, + { + {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, + {2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, + {4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, + {6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1}, + {8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1}, + {11, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {12, 0, 0, 11, HYPER_THREADING_PROC, 11, -1}, + {13, 0, 0, 12, HYPER_THREADING_PROC, 12, -1}, {14, 0, 0, 13, HYPER_THREADING_PROC, 13, -1}, + {15, 0, 0, 14, HYPER_THREADING_PROC, 14, -1}, {16, 0, 0, 15, HYPER_THREADING_PROC, 15, -1}, + {17, 0, 0, 16, HYPER_THREADING_PROC, 16, -1}, {18, 0, 0, 17, HYPER_THREADING_PROC, 17, -1}, + {19, 0, 0, 18, HYPER_THREADING_PROC, 18, -1}, {21, 0, 0, 19, HYPER_THREADING_PROC, 19, -1}, + {22, 0, 0, 20, HYPER_THREADING_PROC, 20, -1}, {23, 0, 0, 21, HYPER_THREADING_PROC, 21, -1}, + {24, 0, 0, 22, HYPER_THREADING_PROC, 22, -1}, {25, 0, 0, 23, HYPER_THREADING_PROC, 23, -1}, + {26, 0, 0, 24, HYPER_THREADING_PROC, 24, -1}, {27, 0, 0, 25, HYPER_THREADING_PROC, 25, -1}, + {28, 1, 1, 28, HYPER_THREADING_PROC, 28, -1}, {29, 1, 1, 29, HYPER_THREADING_PROC, 29, -1}, + {30, 1, 1, 30, HYPER_THREADING_PROC, 30, -1}, {31, 1, 1, 31, HYPER_THREADING_PROC, 31, -1}, + {32, 1, 1, 32, HYPER_THREADING_PROC, 32, -1}, {33, 1, 1, 33, HYPER_THREADING_PROC, 33, -1}, + {34, 1, 1, 34, HYPER_THREADING_PROC, 34, -1}, {35, 1, 1, 35, HYPER_THREADING_PROC, 35, -1}, + {36, 1, 1, 36, HYPER_THREADING_PROC, 36, -1}, {37, 1, 1, 37, HYPER_THREADING_PROC, 37, -1}, + {38, 1, 1, 38, HYPER_THREADING_PROC, 38, -1}, {39, 1, 1, 39, HYPER_THREADING_PROC, 39, -1}, + {40, 1, 1, 40, HYPER_THREADING_PROC, 40, -1}, {41, 1, 1, 41, HYPER_THREADING_PROC, 41, -1}, + {42, 1, 1, 42, HYPER_THREADING_PROC, 42, -1}, {43, 1, 1, 43, HYPER_THREADING_PROC, 43, -1}, + {44, 1, 1, 44, HYPER_THREADING_PROC, 44, -1}, {45, 1, 1, 45, HYPER_THREADING_PROC, 45, -1}, + {46, 1, 1, 46, HYPER_THREADING_PROC, 46, -1}, {47, 1, 1, 47, HYPER_THREADING_PROC, 47, -1}, + {48, 1, 1, 48, HYPER_THREADING_PROC, 48, -1}, {49, 1, 1, 49, HYPER_THREADING_PROC, 49, -1}, + {50, 1, 1, 50, HYPER_THREADING_PROC, 50, -1}, {51, 1, 1, 51, HYPER_THREADING_PROC, 51, -1}, + {52, 1, 1, 52, HYPER_THREADING_PROC, 52, -1}, {53, 1, 1, 53, HYPER_THREADING_PROC, 53, -1}, + {54, 1, 1, 54, HYPER_THREADING_PROC, 54, -1}, {55, 1, 1, 55, HYPER_THREADING_PROC, 55, -1}, + {56, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {57, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, + {58, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {59, 0, 0, 3, MAIN_CORE_PROC, 3, -1}, + {60, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {61, 0, 0, 5, MAIN_CORE_PROC, 5, -1}, + {62, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {63, 0, 0, 7, MAIN_CORE_PROC, 7, -1}, + {64, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {65, 0, 0, 9, MAIN_CORE_PROC, 9, -1}, + {66, 0, 0, 26, MAIN_CORE_PROC, 26, -1}, {67, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, + {68, 0, 0, 11, MAIN_CORE_PROC, 11, -1}, {69, 0, 0, 12, MAIN_CORE_PROC, 12, -1}, + {70, 0, 0, 13, MAIN_CORE_PROC, 13, -1}, {71, 0, 0, 14, MAIN_CORE_PROC, 14, -1}, + {72, 0, 0, 15, MAIN_CORE_PROC, 15, -1}, {73, 0, 0, 16, MAIN_CORE_PROC, 16, -1}, + {74, 0, 0, 17, MAIN_CORE_PROC, 17, -1}, {75, 0, 0, 18, MAIN_CORE_PROC, 18, -1}, + {76, 0, 0, 27, MAIN_CORE_PROC, 27, -1}, {77, 0, 0, 19, MAIN_CORE_PROC, 19, -1}, + {78, 0, 0, 20, MAIN_CORE_PROC, 20, -1}, {79, 0, 0, 21, MAIN_CORE_PROC, 21, -1}, + {80, 0, 0, 22, MAIN_CORE_PROC, 22, -1}, {81, 0, 0, 23, MAIN_CORE_PROC, 23, -1}, + {82, 0, 0, 24, MAIN_CORE_PROC, 24, -1}, {83, 0, 0, 25, MAIN_CORE_PROC, 25, -1}, + {84, 1, 1, 28, MAIN_CORE_PROC, 28, -1}, {85, 1, 1, 29, MAIN_CORE_PROC, 29, -1}, + {86, 1, 1, 30, MAIN_CORE_PROC, 30, -1}, {87, 1, 1, 31, MAIN_CORE_PROC, 31, -1}, + {88, 1, 1, 32, MAIN_CORE_PROC, 32, -1}, {89, 1, 1, 33, MAIN_CORE_PROC, 33, -1}, + {90, 1, 1, 34, MAIN_CORE_PROC, 34, -1}, {91, 1, 1, 35, MAIN_CORE_PROC, 35, -1}, + {92, 1, 1, 36, MAIN_CORE_PROC, 36, -1}, {93, 1, 1, 37, MAIN_CORE_PROC, 37, -1}, + {94, 1, 1, 38, MAIN_CORE_PROC, 38, -1}, {95, 1, 1, 39, MAIN_CORE_PROC, 39, -1}, + {96, 1, 1, 40, MAIN_CORE_PROC, 40, -1}, {97, 1, 1, 41, MAIN_CORE_PROC, 41, -1}, + {98, 1, 1, 42, MAIN_CORE_PROC, 42, -1}, {99, 1, 1, 43, MAIN_CORE_PROC, 43, -1}, + {100, 1, 1, 44, MAIN_CORE_PROC, 44, -1}, {101, 1, 1, 45, MAIN_CORE_PROC, 45, -1}, + {102, 1, 1, 46, MAIN_CORE_PROC, 46, -1}, {103, 1, 1, 47, MAIN_CORE_PROC, 47, -1}, + {104, 1, 1, 48, MAIN_CORE_PROC, 48, -1}, {105, 1, 1, 49, MAIN_CORE_PROC, 49, -1}, + {106, 1, 1, 50, MAIN_CORE_PROC, 50, -1}, {107, 1, 1, 51, MAIN_CORE_PROC, 51, -1}, + {108, 1, 1, 52, MAIN_CORE_PROC, 52, -1}, {109, 1, 1, 53, MAIN_CORE_PROC, 53, -1}, + {110, 1, 1, 54, MAIN_CORE_PROC, 54, -1}, {111, 1, 1, 55, MAIN_CORE_PROC, 55, -1}, + }, + { + {"0,56", "0,56", "0-9,11-19,21-27,56-83"}, + {"1,57", "1,57", "0-9,11-19,21-27,56-83"}, + {"2,58", "2,58", "0-9,11-19,21-27,56-83"}, + {"3,59", "3,59", "0-9,11-19,21-27,56-83"}, + {"4,60", "4,60", "0-9,11-19,21-27,56-83"}, + {"5,61", "5,61", "0-9,11-19,21-27,56-83"}, + {"6,62", "6,62", "0-9,11-19,21-27,56-83"}, + {"7,63", "7,63", "0-9,11-19,21-27,56-83"}, + {"8,64", "8,64", "0-9,11-19,21-27,56-83"}, + {"9,65", "9,65", "0-9,11-19,21-27,56-83"}, + {"", "", ""}, + {"11,67", "11,67", "0-9,11-19,21-27,56-83"}, + {"12,68", "12,68", "0-9,11-19,21-27,56-83"}, + {"13,69", "13,69", "0-9,11-19,21-27,56-83"}, + {"14,70", "14,70", "0-9,11-19,21-27,56-83"}, + {"15,71", "15,71", "0-9,11-19,21-27,56-83"}, + {"16,72", "16,72", "0-9,11-19,21-27,56-83"}, + {"17,73", "17,73", "0-9,11-19,21-27,56-83"}, + {"18,74", "18,74", "0-9,11-19,21-27,56-83"}, + {"19,75", "19,75", "0-9,11-19,21-27,56-83"}, + {"", "", ""}, + {"21,77", "21,77", "0-9,11-19,21-27,56-83"}, + {"22,78", "22,78", "0-9,11-19,21-27,56-83"}, + {"23,79", "23,79", "0-9,11-19,21-27,56-83"}, + {"24,80", "24,80", "0-9,11-19,21-27,56-83"}, + {"25,81", "25,81", "0-9,11-19,21-27,56-83"}, + {"26,82", "26,82", "0-9,11-19,21-27,56-83"}, + {"27,83", "27,83", "0-9,11-19,21-27,56-83"}, + {"28,84", "28,84", "28-55,84-111"}, + {"29,85", "29,85", "28-55,84-111"}, + {"30,86", "30,86", "28-55,84-111"}, + {"31,87", "31,87", "28-55,84-111"}, + {"32,88", "32,88", "28-55,84-111"}, + {"33,89", "33,89", "28-55,84-111"}, + {"34,90", "34,90", "28-55,84-111"}, + {"35,91", "35,91", "28-55,84-111"}, + {"36,92", "36,92", "28-55,84-111"}, + {"37,93", "37,93", "28-55,84-111"}, + {"38,94", "38,94", "28-55,84-111"}, + {"39,95", "39,95", "28-55,84-111"}, + {"40,96", "40,96", "28-55,84-111"}, + {"41,97", "41,97", "28-55,84-111"}, + {"42,98", "42,98", "28-55,84-111"}, + {"43,99", "43,99", "28-55,84-111"}, + {"44,100", "44,100", "28-55,84-111"}, + {"45,101", "45,101", "28-55,84-111"}, + {"46,102", "46,102", "28-55,84-111"}, + {"47,103", "47,103", "28-55,84-111"}, + {"48,104", "48,104", "28-55,84-111"}, + {"49,105", "49,105", "28-55,84-111"}, + {"50,106", "50,106", "28-55,84-111"}, + {"51,107", "51,107", "28-55,84-111"}, + {"52,108", "52,108", "28-55,84-111"}, + {"53,109", "53,109", "28-55,84-111"}, + {"54,110", "54,110", "28-55,84-111"}, + {"55,111", "55,111", "28-55,84-111"}, + {"0,56", "0,56", "0-9,11-19,21-27,56-83"}, + {"1,57", "1,57", "0-9,11-19,21-27,56-83"}, + {"2,58", "2,58", "0-9,11-19,21-27,56-83"}, + {"3,59", "3,59", "0-9,11-19,21-27,56-83"}, + {"4,60", "4,60", "0-9,11-19,21-27,56-83"}, + {"5,61", "5,61", "0-9,11-19,21-27,56-83"}, + {"6,62", "6,62", "0-9,11-19,21-27,56-83"}, + {"7,63", "7,63", "0-9,11-19,21-27,56-83"}, + {"8,64", "8,64", "0-9,11-19,21-27,56-83"}, + {"9,65", "9,65", "0-9,11-19,21-27,56-83"}, + {"66", "66", "0-9,11-19,21-27,56-83"}, + {"11,67", "11,67", "0-9,11-19,21-27,56-83"}, + {"12,68", "12,68", "0-9,11-19,21-27,56-83"}, + {"13,69", "13,69", "0-9,11-19,21-27,56-83"}, + {"14,70", "14,70", "0-9,11-19,21-27,56-83"}, + {"15,71", "15,71", "0-9,11-19,21-27,56-83"}, + {"16,72", "16,72", "0-9,11-19,21-27,56-83"}, + {"17,73", "17,73", "0-9,11-19,21-27,56-83"}, + {"18,74", "18,74", "0-9,11-19,21-27,56-83"}, + {"19,75", "19,75", "0-9,11-19,21-27,56-83"}, + {"76", "76", "0-9,11-19,21-27,56-83"}, + {"21,77", "21,77", "0-9,11-19,21-27,56-83"}, + {"22,78", "22,78", "0-9,11-19,21-27,56-83"}, + {"23,79", "23,79", "0-9,11-19,21-27,56-83"}, + {"24,80", "24,80", "0-9,11-19,21-27,56-83"}, + {"25,81", "25,81", "0-9,11-19,21-27,56-83"}, + {"26,82", "26,82", "0-9,11-19,21-27,56-83"}, + {"27,83", "27,83", "0-9,11-19,21-27,56-83"}, + {"28,84", "28,84", "28-55,84-111"}, + {"29,85", "29,85", "28-55,84-111"}, + {"30,86", "30,86", "28-55,84-111"}, + {"31,87", "31,87", "28-55,84-111"}, + {"32,88", "32,88", "28-55,84-111"}, + {"33,89", "33,89", "28-55,84-111"}, + {"34,90", "34,90", "28-55,84-111"}, + {"35,91", "35,91", "28-55,84-111"}, + {"36,92", "36,92", "28-55,84-111"}, + {"37,93", "37,93", "28-55,84-111"}, + {"38,94", "38,94", "28-55,84-111"}, + {"39,95", "39,95", "28-55,84-111"}, + {"40,96", "40,96", "28-55,84-111"}, + {"41,97", "41,97", "28-55,84-111"}, + {"42,98", "42,98", "28-55,84-111"}, + {"43,99", "43,99", "28-55,84-111"}, + {"44,100", "44,100", "28-55,84-111"}, + {"45,101", "45,101", "28-55,84-111"}, + {"46,102", "46,102", "28-55,84-111"}, + {"47,103", "47,103", "28-55,84-111"}, + {"48,104", "48,104", "28-55,84-111"}, + {"49,105", "49,105", "28-55,84-111"}, + {"50,106", "50,106", "28-55,84-111"}, + {"51,107", "51,107", "28-55,84-111"}, + {"52,108", "52,108", "28-55,84-111"}, + {"53,109", "53,109", "28-55,84-111"}, + {"54,110", "54,110", "28-55,84-111"}, + {"55,111", "55,111", "28-55,84-111"}, + }, + { + {"0-9,11-19,21-27,56-83"}, + {"28-55,84-111"}, + }, +}; LinuxCpuMapTestCase cache_2sockets_48cores_hyperthreading = { 96, 2, @@ -1005,6 +1187,36 @@ LinuxCpuMapTestCase cache_2sockets_20cores_hyperthreading_1 = { }, {}, }; +LinuxCpuMapTestCase cache_1sockets_16cores_hyperthreading = { + 20, + 1, + 1, + 14, + {{20, 6, 8, 6, 0, 0}}, + { + {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, + {2, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, {3, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, + {4, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {5, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, + {6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, {7, 0, 0, 3, MAIN_CORE_PROC, 3, -1}, + {8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {9, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, + {10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, {11, 0, 0, 5, MAIN_CORE_PROC, 5, -1}, + {12, 0, 0, 6, EFFICIENT_CORE_PROC, 6, -1}, {13, 0, 0, 7, EFFICIENT_CORE_PROC, 6, -1}, + {14, 0, 0, 8, EFFICIENT_CORE_PROC, 6, -1}, {15, 0, 0, 9, EFFICIENT_CORE_PROC, 6, -1}, + {16, 0, 0, 10, EFFICIENT_CORE_PROC, 7, -1}, {17, 0, 0, 11, EFFICIENT_CORE_PROC, 7, -1}, + {18, 0, 0, 12, EFFICIENT_CORE_PROC, 7, -1}, {19, 0, 0, 13, EFFICIENT_CORE_PROC, 7, -1}, + }, + { + {"0,5", "0,5", "0-19"}, {"1-2", "1-2", "0-19"}, {"1-2", "1-2", "0-19"}, {"3-4", "3-4", "0-19"}, + {"3-4", "3-4", "0-19"}, {"0,5", "0,5", "0-19"}, {"6-7", "6-7", "0-19"}, {"6-7", "6-7", "0-19"}, + {"8-9", "8-9", "0-19"}, {"8-9", "8-9", "0-19"}, {"10-11", "10-11", "0-19"}, {"10-11", "10-11", "0-19"}, + {"12", "12-15", "0-19"}, {"13", "12-15", "0-19"}, {"14", "12-15", "0-19"}, {"15", "12-15", "0-19"}, + {"16", "16-19", "0-19"}, {"17", "16-19", "0-19"}, {"18", "16-19", "0-19"}, {"19", "16-19", "0-19"}, + {"20", "20-21", ""}, {"21", "20-21", ""}, + }, + { + {"0-21"}, + }, +}; LinuxCpuMapTestCase cache_1sockets_14cores_hyperthreading = { 20, 1, @@ -1135,6 +1347,36 @@ LinuxCpuMapTestCase cache_1sockets_8cores_hyperthreading = { }, {{"0-11"}}, }; +LinuxCpuMapTestCase cache_1sockets_8cores_hyperthreading_1 = { + 8, + 1, + 1, + 8, + {{8, 4, 4, 0, 0, 0}}, + { + {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, + {1, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, + {2, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, + {3, 0, 0, 3, MAIN_CORE_PROC, 3, -1}, + {4, 0, 0, 4, EFFICIENT_CORE_PROC, 4, -1}, + {5, 0, 0, 5, EFFICIENT_CORE_PROC, 4, -1}, + {6, 0, 0, 6, EFFICIENT_CORE_PROC, 4, -1}, + {7, 0, 0, 7, EFFICIENT_CORE_PROC, 4, -1}, + }, + { + {"0", "0", "0-3"}, + {"1", "1", "0-3"}, + {"2", "2", "0-3"}, + {"3", "3", "0-3"}, + {"4", "4-7", ""}, + {"5", "4-7", ""}, + {"6", "4-7", ""}, + {"7", "4-7", ""}, + }, + { + {"0-7"}, + }, +}; LinuxCpuMapTestCase cache_1sockets_6cores_hyperthreading = { 12, 1, @@ -1220,6 +1462,7 @@ INSTANTIATE_TEST_SUITE_P(CPUMap, LinuxCpuMapCacheParserTests, testing::Values(cache_2sockets_104cores_hyperthreading, cache_1sockets_96cores, + cache_2sockets_56cores_hyperthreading, cache_2sockets_48cores_hyperthreading, cache_2sockets_48cores_hyperthreading_1, cache_2sockets_24cores_hyperthreading, @@ -1229,10 +1472,12 @@ INSTANTIATE_TEST_SUITE_P(CPUMap, cache_2sockets_48cores_2, cache_2sockets_20cores_hyperthreading, cache_2sockets_20cores_hyperthreading_1, + cache_1sockets_16cores_hyperthreading, cache_1sockets_14cores_hyperthreading, cache_1sockets_14cores_hyperthreading_1, cache_1sockets_10cores_hyperthreading, cache_1sockets_8cores_hyperthreading, + cache_1sockets_8cores_hyperthreading_1, cache_1sockets_6cores_hyperthreading, cache_1sockets_4cores, cache_VM_cache_0)); diff --git a/src/inference/tests/unit/cpu_map_parser/freq_parser_linux.cpp b/src/inference/tests/unit/cpu_map_parser/freq_parser_linux.cpp index 04ab617961b953..8ccdfad011d19c 100644 --- a/src/inference/tests/unit/cpu_map_parser/freq_parser_linux.cpp +++ b/src/inference/tests/unit/cpu_map_parser/freq_parser_linux.cpp @@ -258,6 +258,188 @@ LinuxCpuMapTestCase freq_2sockets_112cores_hyperthreading = { }, // param[in]: The CPU frequency information table of this simulated platform {{"0-55,112-167"}, {"56-111,168-223"}}, // param[in]: The numa node information table of this simulated platform }; +LinuxCpuMapTestCase freq_2sockets_56cores_hyperthreading = { + 110, + 2, + 2, + 56, + {{110, 56, 0, 54, -1, -1}, {54, 28, 0, 26, 0, 0}, {56, 28, 0, 28, 1, 1}}, + { + {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, + {2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, + {4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, + {6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1}, + {8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1}, + {11, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {12, 0, 0, 11, HYPER_THREADING_PROC, 11, -1}, + {13, 0, 0, 12, HYPER_THREADING_PROC, 12, -1}, {14, 0, 0, 13, HYPER_THREADING_PROC, 13, -1}, + {15, 0, 0, 14, HYPER_THREADING_PROC, 14, -1}, {16, 0, 0, 15, HYPER_THREADING_PROC, 15, -1}, + {17, 0, 0, 16, HYPER_THREADING_PROC, 16, -1}, {18, 0, 0, 17, HYPER_THREADING_PROC, 17, -1}, + {19, 0, 0, 18, HYPER_THREADING_PROC, 18, -1}, {21, 0, 0, 19, HYPER_THREADING_PROC, 19, -1}, + {22, 0, 0, 20, HYPER_THREADING_PROC, 20, -1}, {23, 0, 0, 21, HYPER_THREADING_PROC, 21, -1}, + {24, 0, 0, 22, HYPER_THREADING_PROC, 22, -1}, {25, 0, 0, 23, HYPER_THREADING_PROC, 23, -1}, + {26, 0, 0, 24, HYPER_THREADING_PROC, 24, -1}, {27, 0, 0, 25, HYPER_THREADING_PROC, 25, -1}, + {28, 1, 1, 26, HYPER_THREADING_PROC, 26, -1}, {29, 1, 1, 27, HYPER_THREADING_PROC, 27, -1}, + {30, 1, 1, 28, HYPER_THREADING_PROC, 28, -1}, {31, 1, 1, 29, HYPER_THREADING_PROC, 29, -1}, + {32, 1, 1, 30, HYPER_THREADING_PROC, 30, -1}, {33, 1, 1, 31, HYPER_THREADING_PROC, 31, -1}, + {34, 1, 1, 32, HYPER_THREADING_PROC, 32, -1}, {35, 1, 1, 33, HYPER_THREADING_PROC, 33, -1}, + {36, 1, 1, 34, HYPER_THREADING_PROC, 34, -1}, {37, 1, 1, 35, HYPER_THREADING_PROC, 35, -1}, + {38, 1, 1, 36, HYPER_THREADING_PROC, 36, -1}, {39, 1, 1, 37, HYPER_THREADING_PROC, 37, -1}, + {40, 1, 1, 38, HYPER_THREADING_PROC, 38, -1}, {41, 1, 1, 39, HYPER_THREADING_PROC, 39, -1}, + {42, 1, 1, 40, HYPER_THREADING_PROC, 40, -1}, {43, 1, 1, 41, HYPER_THREADING_PROC, 41, -1}, + {44, 1, 1, 42, HYPER_THREADING_PROC, 42, -1}, {45, 1, 1, 43, HYPER_THREADING_PROC, 43, -1}, + {46, 1, 1, 44, HYPER_THREADING_PROC, 44, -1}, {47, 1, 1, 45, HYPER_THREADING_PROC, 45, -1}, + {48, 1, 1, 46, HYPER_THREADING_PROC, 46, -1}, {49, 1, 1, 47, HYPER_THREADING_PROC, 47, -1}, + {50, 1, 1, 48, HYPER_THREADING_PROC, 48, -1}, {51, 1, 1, 49, HYPER_THREADING_PROC, 49, -1}, + {52, 1, 1, 50, HYPER_THREADING_PROC, 50, -1}, {53, 1, 1, 51, HYPER_THREADING_PROC, 51, -1}, + {54, 1, 1, 52, HYPER_THREADING_PROC, 52, -1}, {55, 1, 1, 53, HYPER_THREADING_PROC, 53, -1}, + {56, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {57, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, + {58, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {59, 0, 0, 3, MAIN_CORE_PROC, 3, -1}, + {60, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {61, 0, 0, 5, MAIN_CORE_PROC, 5, -1}, + {62, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {63, 0, 0, 7, MAIN_CORE_PROC, 7, -1}, + {64, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {65, 0, 0, 9, MAIN_CORE_PROC, 9, -1}, + {66, 0, 0, 54, MAIN_CORE_PROC, 54, -1}, {67, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, + {68, 0, 0, 11, MAIN_CORE_PROC, 11, -1}, {69, 0, 0, 12, MAIN_CORE_PROC, 12, -1}, + {70, 0, 0, 13, MAIN_CORE_PROC, 13, -1}, {71, 0, 0, 14, MAIN_CORE_PROC, 14, -1}, + {72, 0, 0, 15, MAIN_CORE_PROC, 15, -1}, {73, 0, 0, 16, MAIN_CORE_PROC, 16, -1}, + {74, 0, 0, 17, MAIN_CORE_PROC, 17, -1}, {75, 0, 0, 18, MAIN_CORE_PROC, 18, -1}, + {76, 0, 0, 55, MAIN_CORE_PROC, 55, -1}, {77, 0, 0, 19, MAIN_CORE_PROC, 19, -1}, + {78, 0, 0, 20, MAIN_CORE_PROC, 20, -1}, {79, 0, 0, 21, MAIN_CORE_PROC, 21, -1}, + {80, 0, 0, 22, MAIN_CORE_PROC, 22, -1}, {81, 0, 0, 23, MAIN_CORE_PROC, 23, -1}, + {82, 0, 0, 24, MAIN_CORE_PROC, 24, -1}, {83, 0, 0, 25, MAIN_CORE_PROC, 25, -1}, + {84, 1, 1, 26, MAIN_CORE_PROC, 26, -1}, {85, 1, 1, 27, MAIN_CORE_PROC, 27, -1}, + {86, 1, 1, 28, MAIN_CORE_PROC, 28, -1}, {87, 1, 1, 29, MAIN_CORE_PROC, 29, -1}, + {88, 1, 1, 30, MAIN_CORE_PROC, 30, -1}, {89, 1, 1, 31, MAIN_CORE_PROC, 31, -1}, + {90, 1, 1, 32, MAIN_CORE_PROC, 32, -1}, {91, 1, 1, 33, MAIN_CORE_PROC, 33, -1}, + {92, 1, 1, 34, MAIN_CORE_PROC, 34, -1}, {93, 1, 1, 35, MAIN_CORE_PROC, 35, -1}, + {94, 1, 1, 36, MAIN_CORE_PROC, 36, -1}, {95, 1, 1, 37, MAIN_CORE_PROC, 37, -1}, + {96, 1, 1, 38, MAIN_CORE_PROC, 38, -1}, {97, 1, 1, 39, MAIN_CORE_PROC, 39, -1}, + {98, 1, 1, 40, MAIN_CORE_PROC, 40, -1}, {99, 1, 1, 41, MAIN_CORE_PROC, 41, -1}, + {100, 1, 1, 42, MAIN_CORE_PROC, 42, -1}, {101, 1, 1, 43, MAIN_CORE_PROC, 43, -1}, + {102, 1, 1, 44, MAIN_CORE_PROC, 44, -1}, {103, 1, 1, 45, MAIN_CORE_PROC, 45, -1}, + {104, 1, 1, 46, MAIN_CORE_PROC, 46, -1}, {105, 1, 1, 47, MAIN_CORE_PROC, 47, -1}, + {106, 1, 1, 48, MAIN_CORE_PROC, 48, -1}, {107, 1, 1, 49, MAIN_CORE_PROC, 49, -1}, + {108, 1, 1, 50, MAIN_CORE_PROC, 50, -1}, {109, 1, 1, 51, MAIN_CORE_PROC, 51, -1}, + {110, 1, 1, 52, MAIN_CORE_PROC, 52, -1}, {111, 1, 1, 53, MAIN_CORE_PROC, 53, -1}, + }, + { + {"0,56", "0", "3500000"}, + {"1,57", "0", "3500000"}, + {"2,58", "0", "3500000"}, + {"3,59", "0", "3500000"}, + {"4,60", "0", "3500000"}, + {"5,61", "0", "3500000"}, + {"6,62", "0", "3500000"}, + {"7,63", "0", "3500000"}, + {"8,64", "0", "3500000"}, + {"9,65", "0", "3500000"}, + {"", "", ""}, + {"11,67", "0", "3500000"}, + {"12,68", "0", "3500000"}, + {"13,69", "0", "3500000"}, + {"14,70", "0", "3500000"}, + {"15,71", "0", "3500000"}, + {"16,72", "0", "3500000"}, + {"17,73", "0", "3500000"}, + {"18,74", "0", "3500000"}, + {"19,75", "0", "3500000"}, + {"", "", ""}, + {"21,77", "0", "3500000"}, + {"22,78", "0", "3500000"}, + {"23,79", "0", "3500000"}, + {"24,80", "0", "3500000"}, + {"25,81", "0", "3500000"}, + {"26,82", "0", "3500000"}, + {"27,83", "0", "3500000"}, + {"28,84", "1", "3500000"}, + {"29,85", "1", "3500000"}, + {"30,86", "1", "3500000"}, + {"31,87", "1", "3500000"}, + {"32,88", "1", "3500000"}, + {"33,89", "1", "3500000"}, + {"34,90", "1", "3500000"}, + {"35,91", "1", "3500000"}, + {"36,92", "1", "3500000"}, + {"37,93", "1", "3500000"}, + {"38,94", "1", "3500000"}, + {"39,95", "1", "3500000"}, + {"40,96", "1", "3500000"}, + {"41,97", "1", "3500000"}, + {"42,98", "1", "3500000"}, + {"43,99", "1", "3500000"}, + {"44,100", "1", "3500000"}, + {"45,101", "1", "3500000"}, + {"46,102", "1", "3500000"}, + {"47,103", "1", "3500000"}, + {"48,104", "1", "3500000"}, + {"49,105", "1", "3500000"}, + {"50,106", "1", "3500000"}, + {"51,107", "1", "3500000"}, + {"52,108", "1", "3500000"}, + {"53,109", "1", "3500000"}, + {"54,110", "1", "3500000"}, + {"55,111", "1", "3500000"}, + {"0,56", "0", "3500000"}, + {"1,57", "0", "3500000"}, + {"2,58", "0", "3500000"}, + {"3,59", "0", "3500000"}, + {"4,60", "0", "3500000"}, + {"5,61", "0", "3500000"}, + {"6,62", "0", "3500000"}, + {"7,63", "0", "3500000"}, + {"8,64", "0", "3500000"}, + {"9,65", "0", "3500000"}, + {"66", "0", "3500000"}, + {"11,67", "0", "3500000"}, + {"12,68", "0", "3500000"}, + {"13,69", "0", "3500000"}, + {"14,70", "0", "3500000"}, + {"15,71", "0", "3500000"}, + {"16,72", "0", "3500000"}, + {"17,73", "0", "3500000"}, + {"18,74", "0", "3500000"}, + {"19,75", "0", "3500000"}, + {"76", "0", "3500000"}, + {"21,77", "0", "3500000"}, + {"22,78", "0", "3500000"}, + {"23,79", "0", "3500000"}, + {"24,80", "0", "3500000"}, + {"25,81", "0", "3500000"}, + {"26,82", "0", "3500000"}, + {"27,83", "0", "3500000"}, + {"28,84", "1", "3500000"}, + {"29,85", "1", "3500000"}, + {"30,86", "1", "3500000"}, + {"31,87", "1", "3500000"}, + {"32,88", "1", "3500000"}, + {"33,89", "1", "3500000"}, + {"34,90", "1", "3500000"}, + {"35,91", "1", "3500000"}, + {"36,92", "1", "3500000"}, + {"37,93", "1", "3500000"}, + {"38,94", "1", "3500000"}, + {"39,95", "1", "3500000"}, + {"40,96", "1", "3500000"}, + {"41,97", "1", "3500000"}, + {"42,98", "1", "3500000"}, + {"43,99", "1", "3500000"}, + {"44,100", "1", "3500000"}, + {"45,101", "1", "3500000"}, + {"46,102", "1", "3500000"}, + {"47,103", "1", "3500000"}, + {"48,104", "1", "3500000"}, + {"49,105", "1", "3500000"}, + {"50,106", "1", "3500000"}, + {"51,107", "1", "3500000"}, + {"52,108", "1", "3500000"}, + {"53,109", "1", "3500000"}, + {"54,110", "1", "3500000"}, + {"55,111", "1", "3500000"}, + }, + { + {"0-9,11-19,21-27,56-83"}, + {"28-55,84-111"}, + }, +}; LinuxCpuMapTestCase freq_2sockets_48cores_hyperthreading = { 96, 2, @@ -987,6 +1169,7 @@ TEST_P(LinuxCpuMapFreqParserTests, LinuxFreq) {} INSTANTIATE_TEST_SUITE_P(CPUMap, LinuxCpuMapFreqParserTests, testing::Values(freq_2sockets_112cores_hyperthreading, + freq_2sockets_56cores_hyperthreading, freq_2sockets_48cores_hyperthreading, freq_2sockets_48cores_hyperthreading_1, freq_2sockets_24cores_hyperthreading, diff --git a/src/plugins/intel_cpu/src/cpu_types.cpp b/src/plugins/intel_cpu/src/cpu_types.cpp index 67c538bd78341a..865ec1f692b762 100644 --- a/src/plugins/intel_cpu/src/cpu_types.cpp +++ b/src/plugins/intel_cpu/src/cpu_types.cpp @@ -144,6 +144,7 @@ static const TypeToNameMap& get_type_to_name_tbl() { {"Loop", Type::TensorIterator}, {"ReadValue", Type::MemoryInput}, // for construction from name ctor, arbitrary name is used {"Assign", Type::MemoryOutput}, // for construction from layer ctor + {"ReadValueWithSubgraph", Type::MemoryInput}, {"Convert", Type::Convert}, {"NV12toRGB", Type::ColorConvert}, {"NV12toBGR", Type::ColorConvert}, diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp index 457f8368f734dd..1c5598b6d55e26 100644 --- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp +++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp @@ -36,6 +36,8 @@ uint8_t DnnlExtensionUtils::sizeOfDataType(dnnl::memory::data_type dataType) { case dnnl::memory::data_type::s4: case dnnl::memory::data_type::u4: case dnnl::memory::data_type::f8_e8m0: + case dnnl::memory::data_type::f8_e4m3: + case dnnl::memory::data_type::f8_e5m2: case dnnl::memory::data_type::f4_e2m1: return 1; case dnnl::memory::data_type::undef: @@ -70,6 +72,10 @@ dnnl::memory::data_type DnnlExtensionUtils::ElementTypeToDataType(const ov::elem return memory::data_type::u4; case ov::element::f8e8m0: return memory::data_type::f8_e8m0; + case ov::element::f8e4m3: + return memory::data_type::f8_e4m3; + case ov::element::f8e5m2: + return memory::data_type::f8_e5m2; case ov::element::f4e2m1: return memory::data_type::f4_e2m1; case ov::element::undefined: @@ -106,6 +112,10 @@ ov::element::Type DnnlExtensionUtils::DataTypeToElementType(const dnnl::memory:: return ov::element::u4; case memory::data_type::f8_e8m0: return ov::element::f8e8m0; + case memory::data_type::f8_e4m3: + return ov::element::f8e4m3; + case memory::data_type::f8_e5m2: + return ov::element::f8e5m2; case memory::data_type::f4_e2m1: return ov::element::f4e2m1; case memory::data_type::undef: diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp index 2bfbaa68880aa8..6ad7d758b9ff07 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp @@ -11,13 +11,14 @@ namespace intel_cpu { class jit_uni_vcvtneps2bf16 : public jit_emitter { public: + enum class conversion_mode { default_mode, saturation_mode }; jit_uni_vcvtneps2bf16(dnnl::impl::cpu::x64::jit_generator* host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, - ov::element::Type exec_prc = ov::element::bf16) + ov::element::Type exec_prc = ov::element::bf16, + conversion_mode mode = conversion_mode::default_mode) : jit_emitter(host, host_isa, exec_prc) { - if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16) && - !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2)) - prepare_table(); + prepare_table(); + mode_ = mode; } size_t get_inputs_num() const override { @@ -25,6 +26,7 @@ class jit_uni_vcvtneps2bf16 : public jit_emitter { } private: + conversion_mode mode_ = conversion_mode::default_mode; void emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const override { if (host_isa_ == dnnl::impl::cpu::x64::avx512_core) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -44,6 +46,25 @@ class jit_uni_vcvtneps2bf16 : public jit_emitter { conditional3::type; Vmm in = Vmm(in_vec_idxs[0]); + if (mode_ == conversion_mode::saturation_mode) { + Vmm vmm_temp = Vmm(out_vec_idxs[0]); + + h->uni_vmaxps(vmm_temp, in, table_val("bf16_min")); + h->uni_vminps(vmm_temp, vmm_temp, table_val("bf16_max")); + + if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) { + h->vfixupimmps(vmm_temp, in, table_val("selector"), 0); + } else { + Vmm mask = Vmm(aux_vec_idxs[0]); + h->uni_vcmpps(mask, in, in, 0x03); // _CMP_UNORD_Q + h->uni_vblendvps(vmm_temp, vmm_temp, table_val("nan"), mask); + h->uni_vcmpps(mask, in, table_val("inf"), 0x00); // _CMP_EQ_OQ + h->uni_vblendvps(vmm_temp, vmm_temp, table_val("inf"), mask); + h->uni_vcmpps(mask, in, table_val("neg_inf"), 0x00); // _CMP_EQ_OQ + h->uni_vblendvps(vmm_temp, vmm_temp, table_val("neg_inf"), mask); + } + h->uni_vmovups(in, vmm_temp); + } if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16)) { Ymm out = Ymm(out_vec_idxs[0]); @@ -119,6 +140,11 @@ class jit_uni_vcvtneps2bf16 : public jit_emitter { push_arg_entry_of("rounding", 0x00010000, true); push_arg_entry_of("selector", selector_int32, true); push_arg_entry_of("mask_truncation_word", 0x0000ffff, true); + push_arg_entry_of("bf16_max", 0x7F7F0000, true); + push_arg_entry_of("bf16_min", 0xFF7F0000, true); + push_arg_entry_of("nan", 0x7FC00000, true); + push_arg_entry_of("inf", 0x7F800000, true); + push_arg_entry_of("neg_inf", 0xFF800000, true); } size_t aux_vecs_count() const override { diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp index bdb5211009a22a..95de3720bb1e25 100644 --- a/src/plugins/intel_cpu/src/extension.cpp +++ b/src/plugins/intel_cpu/src/extension.cpp @@ -23,6 +23,7 @@ #include "transformations/cpu_opset/common/op/leaky_relu.hpp" #include "transformations/cpu_opset/common/op/ngram.hpp" #include "transformations/cpu_opset/common/op/power_static.hpp" +#include "transformations/cpu_opset/common/op/read_value_with_subgraph.hpp" #include "transformations/cpu_opset/common/op/sdpa.hpp" #include "transformations/cpu_opset/common/op/swish_cpu.hpp" #include "transformations/cpu_opset/x64/op/interaction.hpp" @@ -78,6 +79,7 @@ class TypeRelaxedExtension : public ov::OpExtension> { OP_EXTENSION(ov::intel_cpu::SwishNode) \ OP_EXTENSION(ov::intel_cpu::SDPAWithTransposeReshape) \ OP_EXTENSION(ov::intel_cpu::NgramNode) \ + OP_EXTENSION(ov::intel_cpu::ReadValueWithSubgraph) \ OP_EXTENSION(ov::op::internal::GatherCompressed) \ OP_EXTENSION(ov::op::internal::NonMaxSuppressionIEInternal) \ OP_EXTENSION(ov::op::internal::MulticlassNmsIEInternal) \ diff --git a/src/plugins/intel_cpu/src/graph_dumper.cpp b/src/plugins/intel_cpu/src/graph_dumper.cpp index ffd58fdb162899..3cdd2f389d29f8 100644 --- a/src/plugins/intel_cpu/src/graph_dumper.cpp +++ b/src/plugins/intel_cpu/src/graph_dumper.cpp @@ -357,6 +357,10 @@ void average_counters(const Graph& graph) { * - _.csv * For example: 0_MyModel.csv */ + if (!graph.getGraphContext()) { + DEBUG_LOG("graph.m_context is null. Don't dump average_counters."); + return; + } const std::string& path = graph.getConfig().debugCaps.averageCountersPath; diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index fe0df309dc32f1..1cab7ab7d8c60a 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -2935,12 +2935,19 @@ void GraphOptimizer::MatchSdpaKvCache(Graph& graph) { auto memInputNode = std::dynamic_pointer_cast(node); OPENVINO_ASSERT(memInputNode, "MemoryInput node ", node->getName(), " has unexpected dynamic type"); - ov::optional input_shape; - ov::optional input_prc; - + ov::optional> inputShapes; + ov::optional> inputPrcs; if (!node->getParentEdges().empty()) { - input_shape = ov::optional(node->getInputShapeAtPort(0)); - input_prc = ov::optional(node->getOriginalInputPrecisionAtPort(0)); + inputShapes = ov::optional>(std::vector{}); + inputPrcs = ov::optional>(std::vector{}); + + auto& input_shape_vec = *inputShapes; + auto& input_prc_vec = *inputPrcs; + + for (size_t i = 0; i < node->getParentEdges().size(); i++) { + input_shape_vec.push_back(node->getInputShapeAtPort(i)); + input_prc_vec.push_back(node->getOriginalInputPrecisionAtPort(i)); + } } // search for SDPA @@ -2966,8 +2973,8 @@ void GraphOptimizer::MatchSdpaKvCache(Graph& graph) { memInputNode->getOutputShapeAtPort(0), memInputNode->getOriginalOutputPrecisionAtPort(0), graph.getGraphContext(), - input_shape, - input_prc, + inputShapes, + inputPrcs, sdpa); if (!memInputNode->getParentEdges().empty()) { @@ -3064,12 +3071,18 @@ void GraphOptimizer::DropRedundantMemoryOutput(Graph& graph) { auto memInputNode = std::dynamic_pointer_cast(node); OPENVINO_ASSERT(memInputNode, "MemoryInput node ", node->getName(), " has unexpected dynamic type"); - ov::optional inputShape; - ov::optional inputPrc; - + ov::optional> inputShapes; + ov::optional> inputPrcs; if (!node->getParentEdges().empty()) { - inputShape = ov::optional(node->getInputShapeAtPort(0)); - inputPrc = ov::optional(node->getOriginalInputPrecisionAtPort(0)); + inputShapes = ov::optional>(std::vector{}); + inputPrcs = ov::optional>(std::vector{}); + + auto& input_shape_vec = *inputShapes; + auto& input_prc_vec = *inputPrcs; + for (size_t i = 0; i < node->getParentEdges().size(); i++) { + input_shape_vec.push_back(node->getInputShapeAtPort(i)); + input_prc_vec.push_back(node->getOriginalInputPrecisionAtPort(i)); + } } // search for the MemoryOutputNode @@ -3086,6 +3099,10 @@ void GraphOptimizer::DropRedundantMemoryOutput(Graph& graph) { graph.RemoveEdge(memoryOutputNode->getParentEdgeAt(0)); // there are no output edges from MemoryOutput nodes + CPU_GRAPH_OPTIMIZER_SCOPE(DropRedundantMemoryOutput_SubGraph); + auto memInpNd = std::dynamic_pointer_cast(node); + OPENVINO_ASSERT(memInpNd, "MemoryInput node ", node->getName(), " has unexpected dynamic type"); + // now replace the existing MemoryInput with a special type that works without the corresponding MemoryOutput auto memInputSingle = std::make_shared(memInputNode->getId(), memInputNode->getName(), @@ -3093,17 +3110,24 @@ void GraphOptimizer::DropRedundantMemoryOutput(Graph& graph) { memInputNode->getOutputShapeAtPort(0), memInputNode->getOriginalOutputPrecisionAtPort(0), graph.getGraphContext(), - inputShape, - inputPrc); - + inputShapes, + inputPrcs, + memInpNd->getSubGraph()); graph.AddNode(memInputSingle); if (!memInputNode->getParentEdges().empty()) { - auto parentEdge = memInputNode->getParentEdgeAt(0); - auto parent = parentEdge->getParent(); - const auto inputNum = parentEdge->getInputNum(); - graph.RemoveEdge(parentEdge); - graph.CreateEdge(parent, memInputSingle, inputNum, 0); + auto parentEdgeNum = memInputNode->getParentEdges().size(); + std::vector parentEdges; + for (size_t i = 0; i < parentEdgeNum; i++) { + auto parentEdge = memInputNode->getParentEdgeAt(i); + auto parent = parentEdge->getParent(); + const auto inputNum = parentEdge->getInputNum(); + parentEdges.push_back(parentEdge); + graph.CreateEdge(parent, memInputSingle, inputNum, parentEdge->getOutputNum()); + } + for (auto parentEdge : parentEdges) { + graph.RemoveEdge(parentEdge); + } } for (auto&& edge : memInputNode->getChildEdgesAtPort(0)) { diff --git a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp index 0c8cddd905dc2e..f6aabe376d6eec 100644 --- a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp @@ -9,6 +9,7 @@ #include "utils/bfloat16.hpp" #if defined(OPENVINO_ARCH_X86_64) +# include "cpu/x64/jit_avx512_core_fp8cvt.hpp" # include "nodes/kernels/x64/jit_kernel.hpp" #else # include "cpu_memory.h" @@ -27,6 +28,18 @@ using namespace dnnl::impl::utils; using namespace dnnl::impl::cpu::x64; using namespace Xbyak; +enum f8_type { none, f8e4m3, f8e5m2 }; + +template +f8_type get_f8_type() { + if (std::is_same::value || std::is_same::value) { + return f8_type::f8e4m3; + } else if (std::is_same::value || std::is_same::value) { + return f8_type::f8e5m2; + } + return f8_type::none; +} + template void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst); @@ -50,12 +63,14 @@ void convert_vec(jit_generator& gen, const RegExp& src, cons gen.movdqu(gen.xword[dst], f16vec); } +template class jit_convert_array : public jit_kernel { DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_convert_array) void generate() override { - constexpr size_t vlen = 8u; - constexpr size_t vlen_log2 = 3; + bool is_fp8 = f8_e4m3_emu_ || f8_e5m2_emu_; + size_t vlen = is_fp8 ? 16u : 8u; + size_t vlen_log2 = is_fp8 ? 4 : 3; preamble(); @@ -84,17 +99,24 @@ class jit_convert_array : public jit_kernel { auto tail_size = var(); tail_size = size; - tail_size <<= static_cast(std::logb(_src_size)) - 1; - copy(tmp.pointer(), src, tail_size); + tail_size <<= static_cast(std::logb(_src_size)); + copy(tmp.pointer(), src, tail_size); _convert_vec(*this, tmp.pointer(), tmp.pointer()); tail_size = size; - tail_size <<= static_cast(std::logb(_dst_size)) - 1; - copy(dst, tmp.pointer(), tail_size); + tail_size <<= static_cast(std::logb(_dst_size)); + copy(dst, tmp.pointer(), tail_size); }); postamble(); + + if (f8_e4m3_emu_) + f8_e4m3_emu_->prepare_table(); + if (f8_e5m2_emu_) + f8_e5m2_emu_->prepare_table(); + if (uni_vcvtneps2bf16_) + uni_vcvtneps2bf16_->emit_data(); } public: @@ -108,16 +130,37 @@ class jit_convert_array : public jit_kernel { typedef void (*convert_vec_t)(jit_generator&, const RegExp&, const RegExp&); - jit_convert_array(convert_vec_t convert_vec, size_t src_size, size_t dst_size) + jit_convert_array(convert_vec_t convert_vec) : jit_kernel(jit_name()), _convert_vec(convert_vec), - _src_size(src_size), - _dst_size(dst_size) {} + _src_size(sizeof(src_t)), + _dst_size(sizeof(dst_t)) { + const auto type = get_f8_type(); + if (type == f8_type::f8e4m3) { + f8_e4m3_emu_ = std::make_shared(this, + fp8_emu_reserv_1_, + fp8_emu_reserv_2_, + fp8_emu_reserv_3_, + fp8_emu_reserv_4_, + fp8_emu_reserv_5_, + fp8_emu_scratch_); + } else if (type == f8_type::f8e5m2) { + f8_e5m2_emu_ = std::make_shared(this, + fp8_emu_reserv_1_, + fp8_emu_reserv_2_, + fp8_emu_reserv_3_, + fp8_emu_kmask_aux_, + fp8_emu_scratch_); + } + const bool is_dst_bf16 = std::is_same::value; + if (is_dst_bf16 && mayiuse(cpu_isa_t::avx512_core)) { + uni_vcvtneps2bf16_ = std::make_shared(this, cpu_isa_t::avx512_core); + } + } - template static fn_t get() { if (mayiuse(cpu_isa_t::avx2) && dnnl::impl::cpu::x64::cpu().has(Xbyak::util::Cpu::tF16C)) { - static jit_convert_array converter(convert_vec, sizeof(src_t), sizeof(dst_t)); + static jit_convert_array converter(convert_vec); auto& generator = static_cast(converter); generator.create_kernel(); return (fn_t)generator.jit_ker(); @@ -125,16 +168,192 @@ class jit_convert_array : public jit_kernel { return nullptr; } + std::shared_ptr get_f8_e4m3_emu() const { + return f8_e4m3_emu_; + } + + std::shared_ptr get_f8_e5m2_emu() const { + return f8_e5m2_emu_; + } + + std::shared_ptr get_uni_vcvtneps2bf16() const { + return uni_vcvtneps2bf16_; + } + private: convert_vec_t _convert_vec; size_t _src_size; size_t _dst_size; + + std::shared_ptr f8_e4m3_emu_; + std::shared_ptr f8_e5m2_emu_; + std::shared_ptr uni_vcvtneps2bf16_; + + const Reg64 fp8_emu_scratch_ = rax; + const Zmm fp8_emu_reserv_1_ = Zmm(9); + const Zmm fp8_emu_reserv_2_ = Zmm(10); + const Zmm fp8_emu_reserv_3_ = Zmm(11); + const Zmm fp8_emu_reserv_4_ = Zmm(12); + const Zmm fp8_emu_reserv_5_ = Zmm(13); + const Opmask fp8_emu_kmask_aux_ = Opmask(1); }; +template <> +void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) { + auto const& f8vec = gen.xmm3; + auto const& f32vec = gen.zmm4; + + auto& cvt = dynamic_cast&>(gen); + + gen.vmovups(f32vec, gen.zword[src]); + cvt.get_f8_e4m3_emu()->vcvt_f32_to_f8(f8vec, f32vec); + gen.vmovdqu(gen.xword[dst], f8vec); +} + +template <> +void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) { + auto const& f8vec = gen.xmm3; + auto const& f32vec = gen.zmm4; + + auto& cvt = dynamic_cast&>(gen); + + gen.vmovdqu(f8vec, gen.xword[src]); + cvt.get_f8_e4m3_emu()->vcvt_f8_to_f32(f32vec, f8vec); + gen.vmovups(gen.zword[dst], f32vec); +} + +template <> +void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) { + auto const& f8vec = gen.xmm3; + auto const& f16vec = gen.ymm4; + + auto& cvt = dynamic_cast&>(gen); + + gen.vmovdqu(f16vec, gen.yword[src]); + cvt.get_f8_e4m3_emu()->vcvt_f16_to_f8(f8vec, f16vec); + gen.vmovdqu(gen.xword[dst], f8vec); +} + +template <> +void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) { + auto const& f8vec = gen.xmm3; + auto const& f16vec = gen.ymm4; + + auto& cvt = dynamic_cast&>(gen); + + gen.vmovdqu(f8vec, gen.xword[src]); + cvt.get_f8_e4m3_emu()->vcvt_f8_to_f16(f16vec, f8vec); + gen.vmovdqu(gen.yword[dst], f16vec); +} + +template <> +void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) { + auto const& f8vec = gen.xmm3; + auto const& f16vec = gen.zmm4; + + auto& cvt = dynamic_cast&>(gen); + + gen.vpmovzxwd(f16vec, gen.yword[src]); + gen.vpslld(f16vec, f16vec, 16); + cvt.get_f8_e4m3_emu()->vcvt_f32_to_f8(f8vec, f16vec); + gen.vmovdqu(gen.xword[dst], f8vec); +} + +template <> +void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) { + auto const& f8vec = gen.xmm3; + auto const& f16vec = gen.ymm4; + auto const& f32vec = gen.zmm4; + + auto& cvt = dynamic_cast&>(gen); + + gen.vmovdqu(f8vec, gen.xword[src]); + cvt.get_f8_e4m3_emu()->vcvt_f8_to_f32(f32vec, f8vec); + cvt.get_uni_vcvtneps2bf16()->emit_code({static_cast(f32vec.getIdx())}, + {static_cast(f16vec.getIdx())}); + gen.vmovdqu(gen.yword[dst], f16vec); +} + +template <> +void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) { + auto const& f8vec = gen.xmm3; + auto const& f32vec = gen.zmm4; + + auto& cvt = dynamic_cast&>(gen); + + gen.vmovups(f32vec, gen.zword[src]); + cvt.get_f8_e5m2_emu()->vcvt_f32_to_f8(f8vec, f32vec); + gen.vmovdqu(gen.xword[dst], f8vec); +} + +template <> +void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) { + auto const& f8vec = gen.xmm3; + auto const& f32vec = gen.zmm4; + + auto& cvt = dynamic_cast&>(gen); + + gen.vmovdqu(f8vec, gen.xword[src]); + cvt.get_f8_e5m2_emu()->vcvt_f8_to_f32(f32vec, f8vec); + gen.vmovups(gen.zword[dst], f32vec); +} + +template <> +void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) { + auto const& f8vec = gen.xmm3; + auto const& f16vec = gen.ymm4; + + auto& cvt = dynamic_cast&>(gen); + + gen.vmovdqu(f16vec, gen.yword[src]); + cvt.get_f8_e5m2_emu()->vcvt_f16_to_f8(f8vec, f16vec); + gen.vmovdqu(gen.xword[dst], f8vec); +} + +template <> +void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) { + auto const& f8vec = gen.xmm3; + auto const& f16vec = gen.ymm4; + + auto& cvt = dynamic_cast&>(gen); + + gen.vmovdqu(f8vec, gen.xword[src]); + cvt.get_f8_e5m2_emu()->vcvt_f8_to_f16(f16vec, f8vec); + gen.vmovdqu(gen.yword[dst], f16vec); +} + +template <> +void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) { + auto const& f8vec = gen.xmm3; + auto const& f16vec = gen.zmm4; + + auto& cvt = dynamic_cast&>(gen); + + gen.vpmovzxwd(f16vec, gen.yword[src]); + gen.vpslld(f16vec, f16vec, 16); + cvt.get_f8_e5m2_emu()->vcvt_f32_to_f8(f8vec, f16vec); + gen.vmovdqu(gen.xword[dst], f8vec); +} + +template <> +void convert_vec(jit_generator& gen, const RegExp& src, const RegExp& dst) { + auto const& f8vec = gen.xmm3; + auto const& f16vec = gen.ymm4; + auto const& f32vec = gen.zmm4; + + auto& cvt = dynamic_cast&>(gen); + + gen.vmovdqu(f8vec, gen.xword[src]); + cvt.get_f8_e5m2_emu()->vcvt_f8_to_f32(f32vec, f8vec); + cvt.get_uni_vcvtneps2bf16()->emit_code({static_cast(f32vec.getIdx())}, + {static_cast(f16vec.getIdx())}); + gen.vmovdqu(gen.yword[dst], f16vec); +} + template void jit_convert(const TI* arg, TO* out, size_t count) { - using jit_impl = jit_convert_array; - static auto converter = jit_impl::get(); + using jit_impl = jit_convert_array; + static auto converter = jit_impl::get(); if (converter) { typename jit_impl::args_t args = {arg, out, count}; @@ -185,6 +404,12 @@ const std::tuple& Range::fit(const ov::element::Type& prec) { if (prec.is_real()) { double lbound, ubound; switch (prec) { + case ov::element::f8e4m3: + lbound = static_cast(std::numeric_limits::lowest()); + ubound = static_cast(std::numeric_limits::max()); + case ov::element::f8e5m2: + lbound = static_cast(std::numeric_limits::lowest()); + ubound = static_cast(std::numeric_limits::max()); case ov::element::bf16: lbound = static_cast(std::numeric_limits::lowest()); ubound = static_cast(std::numeric_limits::max()); @@ -293,6 +518,18 @@ struct ConvertPrecision> { src_t lbound, ubound; std::tie(lbound, ubound) = ctx.range(); + // Align with the behavior of ngraph ref and jit implementation. Conversion from f8e4m3-inf + // to float should output float-inf instead of f8e4m3-max. Proper handling of special values + // (nan, inf, overflow) has already been assured by the conversion process. + if (std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value) { + parallel_for(ctx.size, [&](size_t i) { + dst[i] = static_cast(src[i]); + }); + ctx.converted = true; + return; + } + if (std::is_integral::value || ctx.interimPrc.is_real() || std::is_integral::value) { parallel_for(ctx.size, [&](size_t i) { dst[i] = static_cast(std::max(std::min(src[i], ubound), lbound)); @@ -492,6 +729,12 @@ struct ConvertPrecision> { PrecisionInfo::value_type, \ PrecisionInfo::value_type) +#define INTEL_CPU_CVT_FP8_LIST \ + INTEL_CPU_CVT(f32, f8e4m3), INTEL_CPU_CVT(f16, f8e4m3), INTEL_CPU_CVT(bf16, f8e4m3), INTEL_CPU_CVT(f8e4m3, f32), \ + INTEL_CPU_CVT(f8e4m3, f16), INTEL_CPU_CVT(f8e4m3, bf16), INTEL_CPU_CVT(f32, f8e5m2), \ + INTEL_CPU_CVT(f16, f8e5m2), INTEL_CPU_CVT(bf16, f8e5m2), INTEL_CPU_CVT(f8e5m2, f32), \ + INTEL_CPU_CVT(f8e5m2, f16), INTEL_CPU_CVT(f8e5m2, bf16) + #define INTEL_CPU_CVT_LIST \ INTEL_CPU_CVT(u8, i8), INTEL_CPU_CVT(u8, u16), INTEL_CPU_CVT(u8, i16), INTEL_CPU_CVT(u8, u32), \ INTEL_CPU_CVT(u8, i32), INTEL_CPU_CVT(u8, u64), INTEL_CPU_CVT(u8, i64), INTEL_CPU_CVT(u8, f32), \ @@ -535,7 +778,8 @@ struct ConvertPrecision> { INTEL_CPU_CVT(boolean, f16), INTEL_CPU_CVT(boolean, bf16), INTEL_CPU_CVT(boolean, f64), INTEL_CPU_CVT(u8, u8), \ INTEL_CPU_CVT(i8, i8), INTEL_CPU_CVT(u16, u16), INTEL_CPU_CVT(i16, i16), INTEL_CPU_CVT(u32, u32), \ INTEL_CPU_CVT(i32, i32), INTEL_CPU_CVT(u64, u64), INTEL_CPU_CVT(i64, i64), INTEL_CPU_CVT(f32, f32), \ - INTEL_CPU_CVT(f16, f16), INTEL_CPU_CVT(bf16, bf16), INTEL_CPU_CVT(f64, f64), INTEL_CPU_CVT(boolean, boolean) + INTEL_CPU_CVT(f16, f16), INTEL_CPU_CVT(bf16, bf16), INTEL_CPU_CVT(f64, f64), INTEL_CPU_CVT(boolean, boolean), \ + INTEL_CPU_CVT_FP8_LIST #define INTEL_CPU_CVT_FROM_BIN_LIST \ INTEL_CPU_CVT(u1, f32), INTEL_CPU_CVT(u1, f16), INTEL_CPU_CVT(u1, bf16), INTEL_CPU_CVT(u1, f64), \ @@ -667,6 +911,35 @@ struct ConvertFromByteFPPrecision> { } }; +#if defined(OPENVINO_ARCH_X86_64) +struct ConvertFP8Context { + const void* srcPtr; + void* dstPtr; + size_t size; + bool converted; +}; + +template +struct ConvertFP8Precision; + +template +struct ConvertFP8Precision> { + void operator()(ConvertFP8Context& ctx) { + auto src = static_cast(ctx.srcPtr); + auto dst = static_cast(ctx.dstPtr); + constexpr size_t batch = 64; + const size_t iterations = ov::intel_cpu::div_up(ctx.size, batch); + parallel_for(iterations, [&](size_t i) { + const size_t offset = i * batch; + const size_t current_batch_size = std::min(ctx.size - offset, batch); + jit_convert(src + offset, dst + offset, current_batch_size); + }); + + ctx.converted = true; + } +}; +#endif + void cpu_convert(const void* srcPtr, void* dstPtr, ov::element::Type srcPrc, @@ -728,7 +1001,7 @@ void cpu_convert(const void* srcPtr, OV_SWITCH(intel_cpu, ConvertFrom4BitPrecision, ctx, std::tie(srcPrc, dstPrc), INTEL_CPU_CVT_FROM_4BIT_LIST); if (!ctx.converted) OPENVINO_THROW("cpu_convert can't convert from: ", srcPrc, " precision to: ", dstPrc); - } else if (srcPrc.bitwidth() == 8u && srcPrc.is_real()) { + } else if (srcPrc == ov::element::f8e8m0) { ConvertFromByteFPContext ctx{srcPrc, srcPtr, dstPtr, size, false}; OV_SWITCH(intel_cpu, ConvertFromByteFPPrecision, @@ -737,6 +1010,15 @@ void cpu_convert(const void* srcPtr, INTEL_CPU_CVT_FROM_BYTE_FP_LIST); if (!ctx.converted) OPENVINO_THROW("cpu_convert can't convert from: ", srcPrc, " precision to: ", dstPrc); +#if defined(OPENVINO_ARCH_X86_64) + } else if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_fp16) && + (one_of(srcPrc, ov::element::f8e4m3, ov::element::f8e5m2) || + one_of(dstPrc, ov::element::f8e4m3, ov::element::f8e5m2))) { + ConvertFP8Context ctx{srcPtr, dstPtr, size, false}; + OV_SWITCH(intel_cpu, ConvertFP8Precision, ctx, std::tie(srcPrc, dstPrc), INTEL_CPU_CVT_FP8_LIST); + if (!ctx.converted) + OPENVINO_THROW("cpu_convert can't convert from: ", srcPrc, " precision to: ", dstPrc); +#endif } else { ConvertContext ctx{srcPtr, dstPtr, size, interimPrc, dstPrc, false}; OV_SWITCH(intel_cpu, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc), INTEL_CPU_CVT_LIST); diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index 5daefa01eddfab..c2e770db84695b 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -341,8 +341,11 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener reg_d_bias)); } - if (mayiuse(avx512_core) || mayiuse(avx2_vnni_2)) - uni_vcvtneps2bf16.reset(new jit_uni_vcvtneps2bf16(this, isa)); + if (mayiuse(avx512_core) || mayiuse(avx2_vnni_2)) { + auto const mode = jep_.do_output_saturation ? jit_uni_vcvtneps2bf16::conversion_mode::saturation_mode + : jit_uni_vcvtneps2bf16::conversion_mode::default_mode; + uni_vcvtneps2bf16.reset(new jit_uni_vcvtneps2bf16(this, isa, element::bf16, mode)); + } const auto& jep = jep_; @@ -478,7 +481,11 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener apply_post_ops(true, jep_.oc_size > 1 ? j * sizeof(float) : 0); - store_scalar(ptr[reg_dst + j * jep.dst_prc.size()], xmm_dst, exec_prc, jep.dst_prc); + store_scalar(ptr[reg_dst + j * jep.dst_prc.size()], + xmm_dst, + exec_prc, + jep.dst_prc, + jep.do_output_saturation); } for (size_t i = 0; i < jep.inputs_number; i++) @@ -546,7 +553,7 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener apply_post_ops(true); - store_scalar(ptr[reg_dst], xmm_dst, exec_prc, jep.dst_prc); + store_scalar(ptr[reg_dst], xmm_dst, exec_prc, jep.dst_prc, jep.do_output_saturation); for (size_t i = 0; i < jep.inputs_number; i++) if (jep.src_size[i] != 1) @@ -1012,7 +1019,8 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener inline void store_scalar(const Xbyak::Address& op, Xmm xmm_dst, ov::element::Type src_prc, - ov::element::Type dst_prc) { + ov::element::Type dst_prc, + const bool do_output_saturation) { if (src_prc == dst_prc) { switch (src_prc.size()) { case 4: @@ -1047,7 +1055,11 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener uni_vmovss(op, xmm_dst); break; case ov::element::bf16: - uni_vpsrld(xmm_dst, xmm_dst, 16); + if (do_output_saturation) + uni_vpsrld(xmm_dst, xmm_dst, 16); + else + uni_vcvtneps2bf16->emit_code({static_cast(xmm_dst.getIdx())}, + {static_cast(xmm_dst.getIdx())}); uni_vpextrw(op, xmm_dst, 0x0); break; case ov::element::f16: @@ -1355,6 +1367,7 @@ struct EltwiseKey { ov::element::Type outPrc; dnnl::post_ops postOps; EltwiseImplType implType; + bool doOutputSaturation; size_t hash() const { using namespace dnnl::impl; @@ -1390,6 +1403,10 @@ struct EltwiseKey { seed = hash_combine(seed, outPrc.hash()); seed = get_post_op_hash(seed, *postOps.get()); seed = hash_combine(seed, implType); + + if (outPrc == ov::element::bf16) { + seed = hash_combine(seed, doOutputSaturation); + } return seed; } @@ -1416,6 +1433,8 @@ struct EltwiseKey { result = result && (inpDims[i] == rhs.inpDims[i]); } } + if (doOutputSaturation != rhs.doOutputSaturation) + return false; } return result; @@ -1448,7 +1467,8 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { const std::vector& inpPrc, const ov::element::Type& outPrc, const dnnl::post_ops& post_ops, - bool useRuntimePtrs) { + bool useRuntimePtrs, + bool doOutputSaturation) { auto collapseLastDims = [](std::vector& dims, int dimsToCollapse) { for (size_t i = dims.size() - 2; i > dims.size() - dimsToCollapse - 2; i--) { dims[dims.size() - 1] *= dims[i]; @@ -1639,6 +1659,7 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { jep.dst_prc = outPrc; jep.work_amount = jep.dst_size = jep.dims.back(); jep.oc_size = oc_size; + jep.do_output_saturation = doOutputSaturation; std::transform(jep.oc_offsets.begin(), jep.oc_offsets.end(), jep.oc_offsets.begin(), [](size_t& offset) { return offset * sizeof(float); @@ -2160,7 +2181,8 @@ static Eltwise::executorPtr buildExecutor(const EltwiseKey& key) { key.inpPrc, key.outPrc, key.postOps, - key.implType == EltwiseImplType::optimizedShapeAgnostic); + key.implType == EltwiseImplType::optimizedShapeAgnostic, + key.doOutputSaturation); } bool Eltwise::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { @@ -2862,6 +2884,18 @@ void Eltwise::prepareParams() { } } + // FP32 constant inputs may contain values out of BF16 representable range. In case output precision is BF16 we + // choose "saturation" mode for fp32->bf16 conversion procedure to prevent getting -Inf/+Inf values in the + // outputs. Since "saturation" conversion is more time consuming, better solution would be to clamp constants on + // compilation stage (ticket: 159589). + key.doOutputSaturation = false; + for (size_t i = 0; i < getParentEdges().size(); i++) { + if (getParentEdgeAt(i)->getParent()->isConstant()) { + key.doOutputSaturation = true; + break; + } + } + auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, buildExecutor); execPtr = result.first; diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.h b/src/plugins/intel_cpu/src/nodes/eltwise.h index d0ca94e08824c8..8e5fd643665ffd 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.h +++ b/src/plugins/intel_cpu/src/nodes/eltwise.h @@ -43,6 +43,7 @@ struct jit_eltwise_params { size_t work_amount; bool use_runtime_ptrs; + bool do_output_saturation; }; struct jit_eltwise_call_args_indexes { diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index 4bb2f714b284fd..34b659a1ef2882 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -11,6 +11,7 @@ #include "openvino/core/shape.hpp" #include "openvino/core/type/element_type.hpp" #include "shape_inference/shape_inference_pass_through.hpp" +#include "transformations/cpu_opset/common/op/read_value_with_subgraph.hpp" using namespace dnnl; using namespace dnnl::impl::cpu::x64; @@ -226,7 +227,8 @@ Input::Input(const std::shared_ptr& op, const GraphContext::CPtr conte op::v0::Constant::get_type_info_static(), op::v0::Result::get_type_info_static(), op::v3::ReadValue::get_type_info_static(), - op::v6::ReadValue::get_type_info_static())) + op::v6::ReadValue::get_type_info_static(), + ov::intel_cpu::ReadValueWithSubgraph::get_type_info_static())) OPENVINO_THROW_NOT_IMPLEMENTED("CPU Input node doesn't support ngraph operation ", op->get_type_name(), " with name ", @@ -479,7 +481,11 @@ void Input::selectOptimalPrimitiveDescriptor() { supportedPrimitiveDescriptors.clear(); // and just use parent memory descriptor for Output node to avoid reorders insertion - NodeConfig config({PortConfig(getParentOutputMemDesc(getParentEdgeAt(0)), BlockedMemoryDesc::FULL_MASK, 0)}, {}); + std::vector inConfs; + for (size_t i = 0; i < getParentEdges().size(); i++) { + inConfs.push_back({PortConfig(getParentOutputMemDesc(getParentEdgeAt(i)), BlockedMemoryDesc::FULL_MASK, 0)}); + } + NodeConfig config(inConfs, {}); supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); selectPrimitiveDescriptorByIndex(0); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.hpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.hpp index 1bf64d096e4a84..c4fb7608d521de 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.hpp @@ -57,6 +57,7 @@ struct jit_eltwise_params { size_t work_amount; bool use_runtime_ptrs; + bool do_output_saturation; }; struct jit_eltwise_call_args_indexes { diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index 5a0bd7a1e3dff1..d9c9dba5a1219d 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -10,8 +10,11 @@ #include "dnnl_extension_utils.h" #include "dnnl_types.h" #include "memory_desc/cpu_memory_desc_utils.h" +#include "nodes/common/cpu_convert.h" #include "scaled_attn.h" +#include "shape_inference/shape_inference_internal_dyn.hpp" #include "shape_inference/shape_inference_pass_through.hpp" +#include "transformations/cpu_opset/common/op/read_value_with_subgraph.hpp" #include "utils/general_utils.h" using namespace dnnl; @@ -373,8 +376,10 @@ bool MemoryInputBase::isSupportedOperation(const std::shared_ptr try { if (!one_of(op->get_type_info(), ov::op::v3::ReadValue::get_type_info_static(), - ov::op::v6::ReadValue::get_type_info_static())) { - errorMessage = "Node is not an instance of ReadValue from the operation set v3 or v6."; + ov::op::v6::ReadValue::get_type_info_static(), + ov::intel_cpu::ReadValueWithSubgraph::get_type_info_static())) { + errorMessage = "Node is not an instance of ReadValue from the operation set v3 " + "or v6, or is not an instance of intel_cpu::ReadValueWithSubgraph"; return false; } } catch (...) { @@ -402,22 +407,26 @@ MemoryInputBase::MemoryInputBase(const std::string id, const Shape& output_shape, const ov::element::Type& output_prc, const GraphContext::CPtr context, - const ov::optional& input_shape, - const ov::optional& input_prc, + const ov::optional>& input_shape, + const ov::optional>& input_prc, MemoryInputBase::mode mode) : Input(output_shape, output_prc, name, type, context), MemoryStateNode(id) { outputShapes.emplace_back(output_shape); addOriginalOutputPrecision(output_prc); if (input_shape) { - inputShapes.push_back(*input_shape); - isDynamic = isDynamic || input_shape->isDynamic(); + for (auto inp_shape : *input_shape) { + inputShapes.push_back(inp_shape); + isDynamic = isDynamic || inp_shape.isDynamic(); + } if (isDynamic && !shapeInference) { shapeInference = PassThroughShapeInferFactory().makeShapeInfer(); } } if (input_prc) { - addOriginalInputPrecision(*input_prc); + for (auto inp_prc : *input_prc) { + addOriginalInputPrecision(inp_prc); + } } if (created()) { context->getMemoryStatesRegister()->registerInput(this); @@ -456,8 +465,11 @@ void MemoryInputBase::initSupportedPrimitiveDescriptors() { NodeConfig config; if (!getParentEdges().empty()) { - const auto& inputShape = getInputShapeAtPort(0); - config.inConfs.emplace_back(descCreators.at(LayoutType::ncsp)->createSharedDesc(precision, inputShape)); + for (size_t i = 0; i < getParentEdges().size(); i++) { + const auto& inputShape = getInputShapeAtPort(i); + auto inp_prc = getOriginalInputPrecisionAtPort(i); + config.inConfs.emplace_back(descCreators.at(LayoutType::ncsp)->createSharedDesc(inp_prc, inputShape)); + } } const auto& outputShape = getOutputShapeAtPort(0); @@ -562,6 +574,47 @@ void MemoryInputBase::bypassAssignState() { return; } +MemoryInput::MemoryInput(const std::shared_ptr& op, const GraphContext::CPtr ctx) + : MemoryInputBase::MemoryInputBase(op, ctx) { + auto rvWithSubgraph = ov::as_type_ptr(op); + if (rvWithSubgraph) { + body = rvWithSubgraph->get_function(); + subGraph = make_unique(); + if (isDynamic) { + shapeInference = InternalDynShapeInferFactory().makeShapeInfer(); + } + } +} + +MemoryInput::MemoryInput(const std::string id, + const std::string& name, + const std::string& type, + const Shape& output_shape, + const ov::element::Type& output_prc, + const GraphContext::CPtr context, + const ov::optional>& input_shape, + const ov::optional>& input_prc, + std::shared_ptr func, + mode mode) + : MemoryInputBase::MemoryInputBase(id, + name, + type, + output_shape, + output_prc, + context, + input_shape, + input_prc, + mode) { + body = func; + + if (haveSubgraph()) { + subGraph = make_unique(); + if (isDynamic) { + shapeInference = InternalDynShapeInferFactory().makeShapeInfer(); + } + } +} + bool MemoryInput::needInitGraphProcessing() const { return !getParentEdges().empty() && getAssignedState()->is_reset_state(); } @@ -620,6 +673,59 @@ void MemoryInput::initOptimalPrimitiveDescriptor() { config.outConfs.front().setMemDesc(mem_desc); // bypass any checks, we enforce the child descriptor selectedPd->setConfig(config); + + if (haveSubgraph()) { + // Adopt parent configuration, avoid to insert reorder before the MemoryInput. + std::vector graphInputConfig; + + for (size_t i = 0; i < getParentEdges().size(); i++) { + auto desc = getParentOutputMemDesc(getParentEdgeAt(i)); + graphInputConfig.emplace_back(node::Input::InputConfig{desc, true}); + } + + std::vector graphOutputConfig; + for (auto&& portConfig : config.outConfs) { + auto desc = portConfig.getMemDesc(); + graphOutputConfig.emplace_back(node::Input::OutputConfig{desc, true}); + } + + // configure the inner graph to get the information about output memory descriptors + subGraph->Init(body, context, graphInputConfig, graphOutputConfig); + } +} + +// @todo add ascii diagramm for memory mapping / reuse +void MemoryInput::createPrimitive() { + MemoryInputBase::createPrimitive(); + if (haveSubgraph()) { + OPENVINO_ASSERT(getOriginalInputsNumber() == subGraph->inputsNumber(), + "Number of node inputs must be equal the number of inner graph's inputs: ", + getOriginalInputsNumber(), + " != ", + subGraph->inputsNumber()); + + std::vector inputMemory; + for (size_t i = 0; i < getOriginalInputsNumber(); i++) { + auto srcEdgeMem = getSrcMemoryAtPort(i); + // create a separate input memory objects instead of share them. avoid data corruption. + auto mem = std::make_shared(getEngine(), srcEdgeMem->getDescPtr(), srcEdgeMem->getMemoryBlock()); + subgraphMemoryPtrs.push_back(mem); + inputMemory.emplace_back(std::move(mem)); + } + + OPENVINO_ASSERT(getOriginalOutputsNumber() == subGraph->outputsNumber(), + "Number of node outputs must be equal the number of inner graph's outputs: ", + getOriginalOutputsNumber(), + " != ", + subGraph->outputsNumber()); + + std::vector outputMemory; + for (size_t i = 0; i < getOriginalOutputsNumber(); i++) { + outputMemory.emplace_back(getDstMemoryAtPort(i)); + } + + subGraph->Activate(inputMemory, outputMemory); + } } void MemoryInput::runDynamic(dnnl::stream strm) { @@ -655,13 +761,43 @@ void MemoryInput::runDynamic(dnnl::stream strm) { memBlock->reset(); } - // reshape output - const auto& newDims = processInitGraph ? getSrcMemoryAtPort(0)->getStaticDims() : stateDims; + MemoryPtr src = assignedMem; // declare src memory + if (processInitGraph) { + if (haveSubgraph()) { + // put PrepareParams into runDynamic, because init graph is not called each time. + for (size_t i = 0; i < getOriginalInputsNumber(); i++) { + // since the external and internal descriptors are compatible, we may pass the descriptor + subgraphMemoryPtrs[i]->redefineDesc(getSrcMemoryAtPort(i)->getDescPtr()); + } + + subGraph->ResetInferCount(); + subGraph->Infer(); + // depending on the memory sharing solution, we can return here if the memory is substituted from the + // external graph or override the src pointer with the memory pointer pointing to the subgraph output + // memory + OPENVINO_ASSERT(subGraph->outputsNumber() == 1); + src = subGraph->getOutputNodeByIndex(0)->getSrcMemoryAtPort(0); + + // since the shape inference(InternalDynShapeInfer, do nothing) is performed, a memory of the extra child + // edges, attached to the output ports has to be updated after an inference of the inner graph finished + auto& childEdges = getChildEdges(); + for (size_t j = 1; j < childEdges.size(); j++) { + auto& childEdge = childEdges[j]; + auto childEdgePtr = childEdge.lock(); + assert(childEdgePtr); + assert(0 == childEdgePtr->getInputNum()); + childEdgePtr->getMemoryPtr()->redefineDesc(src->getDescPtr()); + } + } else { + src = getSrcMemoryAtPort(0); + } + } + // reshape output + const auto& newDims = src->getStaticDims(); redefineOutputMemory(0, newDims); // copy data when necessary - auto src = processInitGraph ? getSrcMemoryAtPort(0) : assignedMem; if (src->getData() != dst->getData()) { dst->load(*src); } @@ -692,10 +828,21 @@ void MemoryInput::runStatic(dnnl::stream strm) { memBlock->reset(); } - const auto processInitGraph = needInitGraphProcessing(); + const bool processInitGraph = needInitGraphProcessing(); + MemoryPtr src = assignedMem; // declare src memory + if (processInitGraph) { + if (haveSubgraph()) { + subGraph->ResetInferCount(); + subGraph->Infer(); + + OPENVINO_ASSERT(subGraph->outputsNumber() == 1); + src = subGraph->getOutputNodeByIndex(0)->getSrcMemoryAtPort(0); + } else { + src = getSrcMemoryAtPort(0); + } + } // copy data when necessary - auto src = processInitGraph ? getSrcMemoryAtPort(0) : assignedMem; auto dst = getDstMemoryAtPort(0); if (src->getData() != dst->getData()) { dst->load(*src); @@ -749,6 +896,10 @@ MemStatePtr MemoryInput::makeState() const { original_desc); } +std::shared_ptr MemoryInput::getSubGraph() { + return body; +} + bool MemoryInput::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { return MemoryInputBase::isSupportedOperation(op, errorMessage); } @@ -759,8 +910,8 @@ MemoryInputSDPA::MemoryInputSDPA(const std::string id, const Shape& output_shape, const ov::element::Type& output_prc, const GraphContext::CPtr context, - const ov::optional& input_shape, - const ov::optional& input_prc, + const ov::optional>& input_shape, + const ov::optional>& input_prc, const std::shared_ptr& sdpaNode) : MemoryInputBase(id, name, type, output_shape, output_prc, context, input_shape, input_prc), m_sdpaNode(sdpaNode) {} @@ -865,8 +1016,9 @@ MemoryInputSingle::MemoryInputSingle(const std::string id, const Shape& output_shape, const ov::element::Type& output_prc, const GraphContext::CPtr context, - const ov::optional& input_shape, - const ov::optional& input_prc) + const ov::optional>& input_shape, + const ov::optional>& input_prc, + std::shared_ptr func) : MemoryInput(id, name, type, @@ -875,6 +1027,7 @@ MemoryInputSingle::MemoryInputSingle(const std::string id, context, input_shape, input_prc, + func, MemoryInputBase::mode::single_read_value) {} MemStatePtr MemoryInputSingle::makeState() const { diff --git a/src/plugins/intel_cpu/src/nodes/memory.hpp b/src/plugins/intel_cpu/src/nodes/memory.hpp index 9c0c9664ce8a27..1d40849b0f3356 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.hpp +++ b/src/plugins/intel_cpu/src/nodes/memory.hpp @@ -4,6 +4,8 @@ #pragma once +#include + #include #include "input.h" @@ -162,8 +164,8 @@ class MemoryInputBase : public Input, public MemoryStateNode { const Shape& output_shape, const ov::element::Type& output_prc, const GraphContext::CPtr context, - const ov::optional& input_shape, - const ov::optional& input_prc, + const ov::optional>& input_shape, + const ov::optional>& input_prc, mode mode = mode::read_value_assign); protected: @@ -192,15 +194,30 @@ class MemoryInputBase : public Input, public MemoryStateNode { class MemoryInput : public MemoryInputBase { public: - using MemoryInputBase::MemoryInputBase; + MemoryInput(const std::shared_ptr& op, const GraphContext::CPtr ctx); + MemoryInput(const std::string id, + const std::string& name, + const std::string& type, + const Shape& output_shape, + const ov::element::Type& output_prc, + const GraphContext::CPtr context, + const ov::optional>& input_shape, + const ov::optional>& input_prc, + std::shared_ptr func = nullptr, + mode mode = mode::read_value_assign); + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void initOptimalPrimitiveDescriptor() override; void resolveInPlaceEdges(Edge::LOOK look) override; + void createPrimitive() override; + MemStatePtr makeState() const override; + std::shared_ptr getSubGraph(); + protected: bool needInitGraphProcessing() const; void runStatic(dnnl::stream strm) override; @@ -210,7 +227,15 @@ class MemoryInput : public MemoryInputBase { void assignStateHook() override { /*pass*/ } + bool haveSubgraph() const { + return body != nullptr; + } + private: + std::shared_ptr body = nullptr; + std::unique_ptr subGraph = nullptr; + std::vector subgraphMemoryPtrs; + ProxyMemoryBlockPtr memBlock = nullptr; }; @@ -222,8 +247,9 @@ class MemoryInputSingle : public MemoryInput { const Shape& output_shape, const ov::element::Type& output_prc, const GraphContext::CPtr context, - const ov::optional& input_shape, - const ov::optional& input_prc); + const ov::optional>& input_shape, + const ov::optional>& input_prc, + std::shared_ptr func); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; @@ -242,8 +268,8 @@ class MemoryInputSDPA : public MemoryInputBase { const Shape& output_shape, const ov::element::Type& output_prc, const GraphContext::CPtr context, - const ov::optional& input_shape, - const ov::optional& input_prc, + const ov::optional>& input_shape, + const ov::optional>& input_prc, const std::shared_ptr& sdpaNode); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; diff --git a/src/plugins/intel_cpu/src/nodes/memory_state_base.cpp b/src/plugins/intel_cpu/src/nodes/memory_state_base.cpp index 58d855a091d716..19d4863c3afbcb 100644 --- a/src/plugins/intel_cpu/src/nodes/memory_state_base.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory_state_base.cpp @@ -11,10 +11,8 @@ using namespace ov::intel_cpu::node; MemoryNode::MemoryNode(const std::shared_ptr& op) { - if (auto assignOp = ov::as_type_ptr(op)) { + if (auto assignOp = std::dynamic_pointer_cast(op)) { m_id = assignOp->get_variable_id(); - } else if (auto readValueOp = ov::as_type_ptr(op)) { - m_id = readValueOp->get_variable_id(); } else { OPENVINO_THROW("Unexpected ov::Node type: ", op->get_type_info().name, " in MemoryNode"); } diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index db55c728df725e..b3c2aa0b298a5a 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -218,6 +218,8 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< ov::element::Type_t::i4, ov::element::Type_t::u8, ov::element::Type_t::i8, + ov::element::Type_t::f8e4m3, + ov::element::Type_t::f8e5m2, ov::element::Type_t::u16, ov::element::Type_t::i16, ov::element::Type_t::u32, diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.hpp index 947d7ee476bc81..4b0bb0e8c81f8a 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.hpp @@ -82,7 +82,7 @@ class ConvertReduceSum : public ConvertReduceMultiAxisBase { class ConvertReduceMultiAxis : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("ConvertReduceMultiAxis", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("ConvertReduceMultiAxis"); ConvertReduceMultiAxis() { add_matcher(); add_matcher(); diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_no_keep_dims.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_no_keep_dims.hpp index 6cc683154cc175..9684a047afa08e 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_no_keep_dims.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_no_keep_dims.hpp @@ -59,7 +59,7 @@ class ConvertReduction : public ConvertReduceNoKeepDimsBase { class ConvertReduceNoKeepDims : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("ConvertReduceNoKeepDims", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("ConvertReduceNoKeepDims"); ConvertReduceNoKeepDims() { add_matcher>(); add_matcher>(); diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/read_value_with_subgraph.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/read_value_with_subgraph.cpp new file mode 100644 index 00000000000000..39df4b6a29c099 --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/read_value_with_subgraph.cpp @@ -0,0 +1,114 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include "read_value_with_subgraph.hpp" + +#include "itt.hpp" +#include "transformations/itt.hpp" + +ov::intel_cpu::ReadValueWithSubgraph::ReadValueWithSubgraph(const std::shared_ptr& variable, + std::shared_ptr body) { + m_variable = variable; + set_function(body); +} + +ov::intel_cpu::ReadValueWithSubgraph::ReadValueWithSubgraph(const std::shared_ptr& variable, + std::shared_ptr body, + const OutputVector& args) + : ReadValueWithSubgraph(variable, body) { + set_arguments(args); +} + +std::string ov::intel_cpu::ReadValueWithSubgraph::get_variable_id() const { + OPENVINO_ASSERT(m_variable, "Variable is not initialized. Variable_id is unavailable"); + return get_variable()->get_info().variable_id; +} + +void ov::intel_cpu::ReadValueWithSubgraph::set_input(const Output& value, + const std::shared_ptr& body_parameter) { + OPENVINO_ASSERT(body_parameter != nullptr, "Missing parameter! parameter is is nullptr!"); + auto param_index = m_bodies[0]->get_parameter_index(body_parameter); + + OPENVINO_ASSERT(param_index != -1, "Missing parameter ", body_parameter->get_friendly_name(), " for \'body\'!"); + + set_invariant_inputs(value, {body_parameter}); +} + +ov::Output ov::intel_cpu::ReadValueWithSubgraph::set_output( + const std::shared_ptr& body_result) { + OPENVINO_ASSERT(body_result != nullptr, "Incorrect result in \"body\"! Result cant be \'nullptr\'"); + auto result_id = m_bodies[0]->get_result_index(body_result); + + OPENVINO_ASSERT(result_id != -1, "Missing result ", body_result->get_friendly_name(), "in \'body\'!"); + + return set_body_outputs({body_result}); +} + +std::shared_ptr ov::intel_cpu::ReadValueWithSubgraph::clone_with_new_inputs( + const OutputVector& new_args) const { + INTERNAL_OP_SCOPE(intel_cpu_ReadValueWithSubgraphNode_clone_with_new_inputs); + + check_new_args_count(this, new_args); + auto op = + std::make_shared(this->get_variable(), get_function()->clone(), new_args); + OPENVINO_ASSERT(op.get(), + op != nullptr, + "Cannot clone ", + description(), + " operation with name ", + get_friendly_name()); + op->set_output_size(m_output_descriptions[0].size()); + for (const auto& m_input_descr : m_input_descriptions[0]) { + op->m_input_descriptions[0].push_back(m_input_descr->copy()); + } + for (const auto& m_output_descr : m_output_descriptions[0]) { + op->m_output_descriptions[0].push_back(m_output_descr->copy()); + } + op->validate_and_infer_types(); + return op; +} + +bool ov::intel_cpu::ReadValueWithSubgraph::visit_attributes(AttributeVisitor& visitor) { + INTERNAL_OP_SCOPE(intel_cpu_ReadValueWithSubgraphNode_visit_attributes); + visitor.on_attribute("variable_id", m_variable); + + auto variable_info = m_variable->get_info(); + visitor.on_attribute("variable_type", variable_info.data_type); + visitor.on_attribute("variable_shape", variable_info.data_shape); + m_variable->update(variable_info); + + visitor.on_attribute("body", m_bodies[0]); + visitor.on_attribute("inputs", m_input_descriptions[0]); + visitor.on_attribute("outputs", m_output_descriptions[0]); + return true; +} + +void ov::intel_cpu::ReadValueWithSubgraph::validate_and_infer_types() { + INTERNAL_OP_SCOPE(intel_cpu_ReadValueWithSubgraphNode_validate_and_infer_types); + + NODE_VALIDATION_CHECK(this, + m_bodies.size() == 1, + "ReadValueWithSubgraph contains incorrect number of bodies:", + m_bodies.size()); + + validate_and_infer_type_body(get_function(), m_input_descriptions[0]); + + auto output_nodes = outputs(); + + auto outputs_map = get_mapping_outputs_on_body_description(m_output_descriptions[0]); + + // Checking each output + for (size_t output_index = 0; output_index < output_nodes.size(); ++output_index) { + NODE_VALIDATION_CHECK(this, + outputs_map.count(output_index) != 0, + "Incorrect associating in body! Output ", + output_index, + " is not associated with results in then_body!"); + + auto desc = outputs_map.at(output_index); + + auto node_result = m_bodies[0]->get_results().at(desc->m_body_value_index)->input_value(0); + + set_output_type(output_index, node_result.get_element_type(), node_result.get_partial_shape()); + } +} \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/read_value_with_subgraph.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/read_value_with_subgraph.hpp new file mode 100644 index 00000000000000..037f8eb302afcd --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/read_value_with_subgraph.hpp @@ -0,0 +1,37 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/op.hpp" +#include "openvino/op/read_value.hpp" +#include "openvino/op/util/sub_graph_base.hpp" +#include "transformations/cpu_opset/common/op/submodel.hpp" + +namespace ov { +namespace intel_cpu { + +class ReadValueWithSubgraph : public ov::op::util::SubGraphOp, public ov::op::util::VariableExtension { +public: + OPENVINO_OP("ReadValueWithSubgraph", "cpu_plugin_opset"); + + ReadValueWithSubgraph() = default; + ReadValueWithSubgraph(const std::shared_ptr& variable, std::shared_ptr body); + ReadValueWithSubgraph(const std::shared_ptr& variable, + std::shared_ptr body, + const OutputVector& args); + + std::string get_variable_id() const override; + + void set_input(const Output& value, const std::shared_ptr& body_parameter); + + Output set_output(const std::shared_ptr& body_result); + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + bool visit_attributes(AttributeVisitor& visitor) override; + void validate_and_infer_types() override; +}; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/causal_mask_preprocess_fusion.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/causal_mask_preprocess_fusion.hpp index f9db370aea49bb..4a46a042722a12 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/causal_mask_preprocess_fusion.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/causal_mask_preprocess_fusion.hpp @@ -11,9 +11,9 @@ namespace intel_cpu { class CausalMaskPreprocessFusion : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("CausalMaskPreprocessFusion", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("CausalMaskPreprocessFusion"); CausalMaskPreprocessFusion(); }; } // namespace intel_cpu -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_readvalue_inputs_to_subgraph.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_readvalue_inputs_to_subgraph.cpp new file mode 100644 index 00000000000000..e2b283e65c8615 --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_readvalue_inputs_to_subgraph.cpp @@ -0,0 +1,164 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "move_readvalue_inputs_to_subgraph.hpp" + +#include + +#include "itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/pass/constant_folding.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "ov_ops/rotary_positional_embeddings.hpp" +#include "transformations/cpu_opset/common/op/read_value_with_subgraph.hpp" +#include "transformations/cpu_opset/common/op/sdpa.hpp" +#include "transformations/cpu_opset/common/op/submodel.hpp" +#include "transformations/rt_info/disable_fp16_compression.hpp" +#include "transformations/utils/gen_pattern.hpp" +#include "transformations/utils/utils.hpp" + +ov::intel_cpu::MoveReadValueInputsToSubgraph::MoveReadValueInputsToSubgraph() { + MATCHER_SCOPE(MoveReadValueInputsToSubgraph); + using namespace ov::pass::pattern; + + auto readvalue_pattern = pass::pattern::wrap_type(); + + ov::matcher_pass_callback callback = [=](Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + auto readvalue = as_type_ptr(pattern_map.at(readvalue_pattern).get_node_shared_ptr()); + if (!readvalue || readvalue->get_input_size() != 1u) { + return false; + } + + if (readvalue->get_rt_info().count("DisableInitSubgraphFusing") && + readvalue->get_rt_info()["DisableInitSubgraphFusing"].as()) { + return false; + } + + NodeVector subgraph_nodes; + std::unordered_set> visited_path_to_output; // Cache nodes which connect to Output. + std::unordered_set> visited_path_to_rv; // Cache nodes which connect to ReadValue. + NodeVector inputs = {}; + OutputVector outputs = {}; + + // DFS, Check if current node's final successor is only ReadValue. + std::function, bool&)> dfs = [&](std::shared_ptr node, + bool& found_output) { + if (found_output) { + return; + } + + if (visited_path_to_output.find(node) != visited_path_to_output.end()) { + found_output = true; + return; + } + + if (visited_path_to_rv.find(node) != visited_path_to_rv.end()) { + return; + } + + // node is Output + if (node->get_output_target_inputs(0).size() == 0u) { + found_output = true; + return; + } + + bool any_child_on_output_path = false; + for (const auto& child : node->get_output_target_inputs(0)) { + auto son = child.get_node()->shared_from_this(); + if (son == readvalue) { + continue; + } + + bool new_found_output = false; + dfs(son, new_found_output); + if (new_found_output) { + any_child_on_output_path = true; + } + } + + if (any_child_on_output_path) { + visited_path_to_output.insert(node); + found_output = any_child_on_output_path; + } + }; + + std::function)> reverse_dfs = [&](std::shared_ptr node) { + if (visited_path_to_output.find(node) != visited_path_to_output.end()) { + inputs.emplace_back(node); + return; + } + + if (visited_path_to_rv.find(node) != visited_path_to_rv.end()) { + return; + } + + if (ov::op::util::is_parameter(node)) { + inputs.emplace_back(node); + return; + } + + // Check if the current node has path(bypassing the ReadValue node) to the Output node via dfs algorithm. + bool found_output = false; // Flag: find Output node + dfs(node, found_output); + + if (found_output) { + inputs.emplace_back(node); + visited_path_to_output.insert(node); + return; + } + + visited_path_to_rv.insert(node); + + // Cache to subgraph_nodes + subgraph_nodes.emplace_back(node); + + for (size_t i = 0; i < node->get_input_size(); i++) { + reverse_dfs(node->get_input_node_shared_ptr(i)); + } + }; + + // Reverse DFS ReadValue, find all suitable nodes and move them to subgraph_nodes. + reverse_dfs(readvalue->get_input_node_shared_ptr(0)); + + if (inputs.size() == 0 || subgraph_nodes.size() == 0) { + return false; + } + + // Subgraph's input + auto params = ParameterVector{}; + for (auto inp : inputs) { + auto param = + std::make_shared(inp->get_element_type(), inp->get_output_partial_shape(0)); + params.push_back(param); + for (const auto& child : inp->get_output_target_inputs(0)) { + auto it = std::find(subgraph_nodes.begin(), subgraph_nodes.end(), child.get_node()->shared_from_this()); + if (it != subgraph_nodes.end()) { + child.replace_source_output(param); + } + } + } + + // Subgraph's output + auto last_node = readvalue->get_input_node_shared_ptr(0); + auto output = std::make_shared(last_node); + auto func = std::make_shared(ov::ResultVector({output}), params, "state_init_submodel"); + + auto new_rv = std::make_shared(readvalue->get_variable(), func); + + for (size_t i = 0; i < inputs.size(); i++) { + new_rv->set_input(inputs[i]->output(0), params[i]); + } + new_rv->set_output(output); + + // Replace ReadValue with ov::intel_cpu::ReadValueWithSubgraph + ov::replace_node(readvalue, new_rv); + ov::copy_runtime_info(subgraph_nodes, new_rv); + new_rv->validate_and_infer_types(); + return true; + }; + + auto m = std::make_shared(readvalue_pattern, matcher_name); + this->register_matcher(m, callback); +} \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_readvalue_inputs_to_subgraph.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_readvalue_inputs_to_subgraph.hpp new file mode 100644 index 00000000000000..220003cc83ead1 --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_readvalue_inputs_to_subgraph.hpp @@ -0,0 +1,31 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace intel_cpu { + +/** + * @brief Move ReadValue's inputs inside the new CPU ngraph node:ReadValueWithSubgraph op. + * intput1 + * | + * Some nodes(They have only one common successor[ReadValue]) input1 + * | | + * ReadValue -------> ReadValueWithSubgraph(Subgraph is inside) + * | \ | \ + * Assign others Assign others + */ + +class MoveReadValueInputsToSubgraph : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("MoveReadValueInputsToSubgraph", "0"); + MoveReadValueInputsToSubgraph(); +}; + +} // namespace intel_cpu +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/rnn_sequences_optimization.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/rnn_sequences_optimization.hpp index a63a3dce8219c2..4cfc27d7836180 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/rnn_sequences_optimization.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/rnn_sequences_optimization.hpp @@ -29,7 +29,7 @@ class OptimizeRNNSequenceTransposes : public ov::pass::MatcherPass { class OptimizeSequenceTransposes : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("OptimizeSequenceTransposes", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("OptimizeSequenceTransposes"); OptimizeSequenceTransposes(); }; diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.cpp index 0ec2049d1ccc1c..447adb0b2fe23f 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.cpp @@ -270,6 +270,13 @@ StatefulSDPAFusion::StatefulSDPAFusion() { else assign_v_node->set_arguments({new_node->output(2)}); + // Markup pattern: + // ReadValue->Convert(Optional)->ScaledDotProductAttentionWithKVCache->Convert(Optional)->Assign, so that + // ReadValue can't be replaced with ReadValueWithSubgraph in this pattern. + // TODO: Temporarily skip this pattern. If MemoryInputSDPA supports Subgraph in the future, it may be deleted. + past_k_node->get_rt_info()["DisableInitSubgraphFusing"] = true; + past_v_node->get_rt_info()["DisableInitSubgraphFusing"] = true; + return true; }; diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp index 614f7d690f8726..5142ee319ac523 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp @@ -10,6 +10,7 @@ #include "common/pass/convert_to_swish_cpu.hpp" #include "common/pass/fc_bias_fusion.hpp" #include "common/pass/move_fc_reshape_to_weights.hpp" +#include "common/pass/move_readvalue_inputs_to_subgraph.hpp" #include "common/pass/rnn_sequences_optimization.hpp" #include "config.h" #include "itt.hpp" @@ -70,6 +71,7 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr& model, const C false); CPU_REGISTER_PASS_COMMON(manager, ov::pass::Validate); CPU_REGISTER_PASS_COMMON(manager, ov::pass::EliminateConvert); // Need to clean up after the ConvertPrecision. + CPU_REGISTER_PASS_COMMON(manager, MoveReadValueInputsToSubgraph); manager.run_passes(model); } diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/mha_fusion.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/mha_fusion.hpp index fe4f4ccae04f1c..d84c11af9801e5 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/mha_fusion.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/mha_fusion.hpp @@ -54,7 +54,7 @@ class MHAQuantFusion2 : public MHAFusionBase { class MHAFusion : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("MHAFusion", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("MHAFusion"); MHAFusion() { add_matcher(); add_matcher(); diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index a63377312ecb95..4d7df9a335e98a 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -37,6 +37,7 @@ #include "transformations/common_optimizations/nop_elimination.hpp" #include "transformations/common_optimizations/reshape_prelu.hpp" #include "transformations/common_optimizations/rms_fusion.hpp" +#include "transformations/common_optimizations/sdpa_fusion.hpp" #include "transformations/common_optimizations/transpose_sinking.hpp" #include "transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp" #include "transformations/common_optimizations/wrap_interpolate_into_transposes.hpp" @@ -79,6 +80,7 @@ #include "transformations/op_conversions/detection_output_downgrade.hpp" #include "transformations/op_conversions/detection_output_upgrade.hpp" #include "transformations/op_conversions/eye_decomposition.hpp" +#include "transformations/op_conversions/fake_convert_decomposition.hpp" #include "transformations/op_conversions/fq_decomposition.hpp" #include "transformations/op_conversions/gelu7_downgrade.hpp" #include "transformations/op_conversions/group_normalization_decomposition.hpp" @@ -695,6 +697,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis CPU_DISABLE_PASS_COMMON(manager, ov::pass::MatMulConstTransposesExtraction); CPU_DISABLE_PASS_COMMON(manager, ov::pass::ConvertScatterNDUpdate15ToScatterNDUpdate3); CPU_DISABLE_PASS_COMMON(manager, ov::pass::ConvertSliceScatter); + CPU_DISABLE_PASS_COMMON(manager, ov::pass::SDPAFusion); CPU_DISABLE_PASS_X64(manager, ov::pass::HSigmoidDecomposition); CPU_DISABLE_PASS_X64(manager, ov::pass::ReduceL1Decomposition); @@ -1291,6 +1294,7 @@ void Transformations::PostSnippets(void) { return node::FakeQuantize::isSupportedOperation(node, errMsg); }, ov::pass::FakeQuantizeDecomposition); + CPU_REGISTER_PASS_COMMON(postSnippetsManager, ov::pass::FakeConvertDecomposition); CPU_REGISTER_PASS_COMMON(postSnippetsManager, ov::pass::ConstantFolding); postSnippetsManager.run_passes(model); } diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.cpp index 4989fb3a0f04b7..a3c1f9ef7d3544 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.cpp @@ -16,11 +16,45 @@ using namespace CPUTestUtils; namespace ov { namespace test { +static std::string special_value_to_string(const ov::test::SpecialValue& value) { + if (value == SpecialValue::none) { + return "none"; + } else if (value == SpecialValue::nan) { + return "nan"; + } else if (value == SpecialValue::inf) { + return "inf"; + } else if (value == SpecialValue::overflow) { + return "overflow"; + } + return "unknown"; +} + +template +static T set_special_value(T& value, const ov::test::SpecialValue& special_value) { + if (special_value == ov::test::SpecialValue::nan) { + value = NAN; + } else if (special_value == ov::test::SpecialValue::inf) { + value = INFINITY; + } else if (special_value == ov::test::SpecialValue::overflow) { + value = value + std::numeric_limits::max(); + } + return value; +} + +template +static void modify_value(ov::Tensor& tensor, const ov::test::SpecialValue& special_value) { + T* dataPtr = static_cast(tensor.data()); + for (size_t i = 0; i < tensor.get_size(); i++) { + set_special_value(dataPtr[i], special_value); + } +} + std::string ConvertCPULayerTest::getTestCaseName(testing::TestParamInfo obj) { InputShape inputShape; ov::element::Type inPrc, outPrc; + ov::test::SpecialValue special_value; CPUSpecificParams cpuParams; - std::tie(inputShape, inPrc, outPrc, cpuParams) = obj.param; + std::tie(inputShape, inPrc, outPrc, special_value, cpuParams) = obj.param; std::ostringstream result; @@ -30,6 +64,7 @@ std::string ConvertCPULayerTest::getTestCaseName(testing::TestParamInfo(inPrc, shape)); @@ -101,6 +146,31 @@ void ConvertCPULayerTest::SetUp() { function = makeNgraphFunction(inPrc, params, conversion, "ConversionCPU"); } +void ConvertCPULayerTest::generate_inputs(const std::vector& targetInputStaticShapes) { + inputs.clear(); + const auto& funcInputs = function->inputs(); + for (size_t i = 0; i < funcInputs.size(); ++i) { + const auto& funcInput = funcInputs[i]; + ov::Tensor tensor = + ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + if (special_value != ov::test::SpecialValue::none) { + if (inPrc == ov::element::f32) { + modify_value(tensor, special_value); + } else if (inPrc == ov::element::f16) { + modify_value(tensor, special_value); + } else if (inPrc == ov::element::bf16) { + modify_value(tensor, special_value); + } else if (inPrc == ov::element::f8e4m3) { + modify_value(tensor, special_value); + } else if (inPrc == ov::element::f8e5m2) { + modify_value(tensor, special_value); + } + } + + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } +} + void ConvertCPULayerTest::validate_out_prc() const { if (outPrc == ov::element::boolean) FAIL() << "ConvertCPULayerTest supports only non boolean output prc"; diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.hpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.hpp index a53f56f873151c..a4f4e0fc56c238 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.hpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/conversion.hpp @@ -13,9 +13,12 @@ using namespace CPUTestUtils; namespace ov { namespace test { +enum SpecialValue { none, nan, inf, overflow }; + using convertLayerTestParamsSet = std::tuple; class ConvertCPULayerTest : public testing::WithParamInterface, @@ -25,9 +28,12 @@ class ConvertCPULayerTest : public testing::WithParamInterface& targetInputStaticShapes) override; virtual void validate_out_prc() const; ov::element::Type inPrc, outPrc; +private: + ov::test::SpecialValue special_value; }; class ConvertToBooleanCPULayerTest : public ConvertCPULayerTest { diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/conversion.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/conversion.cpp index 11e0440b2e3618..e5d87f5cb2f3dd 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/conversion.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/conversion.cpp @@ -16,6 +16,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_7D_Dynamic, ConvertCPULayerTe ::testing::ValuesIn(inShapes_7D_dynamic()), ::testing::ValuesIn(precisions()), ::testing::ValuesIn(precisions()), + ::testing::Values(ov::test::SpecialValue::none), ::testing::Values(CPUSpecificParams({}, {}, {}, {}))), ConvertCPULayerTest::getTestCaseName); @@ -24,6 +25,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_7D_Static, ConvertCPULayerTes ::testing::ValuesIn(inShapes_7D_static()), ::testing::ValuesIn(precisions()), ::testing::ValuesIn(precisions()), + ::testing::Values(ov::test::SpecialValue::none), ::testing::Values(CPUSpecificParams({}, {}, {}, {}))), ConvertCPULayerTest::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/conversion.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/conversion.cpp index 59ca1065bf78d9..8181304bf95e7d 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/conversion.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/conversion.cpp @@ -31,6 +31,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_4D_Dynamic, ConvertCPULayerTe ::testing::ValuesIn(inShapes_4D_dynamic()), ::testing::ValuesIn(precisions()), ::testing::ValuesIn(precisions()), + ::testing::Values(ov::test::SpecialValue::none), ::testing::ValuesIn(memForm4D_dynamic)), ConvertCPULayerTest::getTestCaseName); @@ -39,6 +40,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_4bit_Dynamic, ConvertCPULayer ::testing::Combine(::testing::ValuesIn(inShapes_4D_dynamic()), ::testing::ValuesIn({ov::element::u4, ov::element::i4}), ::testing::ValuesIn({ov::element::f32, ov::element::bf16, ov::element::u8, ov::element::i8}), + ::testing::Values(ov::test::SpecialValue::none), ::testing::Values(CPUSpecificParams({nchw}, {nchw}, {}, {"ref"}))), ConvertCPULayerTest::getTestCaseName); @@ -52,9 +54,69 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_4D_Static, ConvertCPULayerTes ::testing::ValuesIn(inShapes_4D_static()), ::testing::ValuesIn(precisions()), ::testing::ValuesIn(precisions()), + ::testing::Values(ov::test::SpecialValue::none), ::testing::ValuesIn(memForm4D_static_common)), ConvertCPULayerTest::getTestCaseName); +const std::vector float_precisions = { + ov::element::f32, + ov::element::f16, + ov::element::bf16, +}; + +const std::vector f8_precisions = { + ov::element::f8e4m3, + ov::element::f8e5m2, +}; + +const std::vector specialValue = { + ov::test::SpecialValue::none, + ov::test::SpecialValue::nan, + ov::test::SpecialValue::inf, + ov::test::SpecialValue::overflow, +}; + +std::vector memForm4D_fp8 = { + CPUSpecificParams({nchw}, {nchw}, {}, expectedPrimitiveType()), + CPUSpecificParams({nhwc}, {nhwc}, {}, expectedPrimitiveType()), +}; + +INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_from_fp8_Static, ConvertCPULayerTest, + ::testing::Combine( + ::testing::ValuesIn(inShapes_4D_static()), + ::testing::ValuesIn(f8_precisions), + ::testing::ValuesIn(float_precisions), + ::testing::ValuesIn(specialValue), + ::testing::ValuesIn(memForm4D_fp8)), + ConvertCPULayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_to_fp8_Static, ConvertCPULayerTest, + ::testing::Combine( + ::testing::ValuesIn(inShapes_4D_static()), + ::testing::ValuesIn(float_precisions), + ::testing::ValuesIn(f8_precisions), + ::testing::ValuesIn(specialValue), + ::testing::ValuesIn(memForm4D_fp8)), + ConvertCPULayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_from_fp8_Dynamic, ConvertCPULayerTest, + ::testing::Combine( + ::testing::ValuesIn(inShapes_4D_dynamic()), + ::testing::ValuesIn(f8_precisions), + ::testing::ValuesIn(float_precisions), + ::testing::ValuesIn(specialValue), + ::testing::ValuesIn(memForm4D_fp8)), + ConvertCPULayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_to_fp8_Dynamic, ConvertCPULayerTest, + ::testing::Combine( + ::testing::ValuesIn(inShapes_4D_dynamic()), + ::testing::ValuesIn(float_precisions), + ::testing::ValuesIn(f8_precisions), + ::testing::ValuesIn(specialValue), + ::testing::ValuesIn(memForm4D_fp8)), + ConvertCPULayerTest::getTestCaseName); + } // namespace Conversion } // namespace test } // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/conversion.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/conversion.cpp index 9c34d6220d4b2d..ab1e06639c5a3e 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/conversion.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/conversion.cpp @@ -23,6 +23,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_blocked_Dynamic, ConvertCPULa ::testing::ValuesIn(inShapes_4D_dynamic()), ::testing::ValuesIn(precisions()), ::testing::ValuesIn(precisions()), + ::testing::Values(ov::test::SpecialValue::none), ::testing::ValuesIn(memForm4D_dynamic)), ConvertCPULayerTest::getTestCaseName); @@ -44,6 +45,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_Blocked, ConvertCPULayerTest, ::testing::ValuesIn(inShapes_4D_blocked), ::testing::ValuesIn(precisions()), ::testing::ValuesIn(precisions()), + ::testing::Values(ov::test::SpecialValue::none), ::testing::ValuesIn(filterCPUSpecificParams(memForm4D_static_blocked))), ConvertCPULayerTest::getTestCaseName); @@ -52,6 +54,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_BOOL_Static, ConvertToBoolean ::testing::ValuesIn(inShapes_4D_static()), ::testing::ValuesIn(precisions_floating_point), ::testing::Values(ov::element::boolean), + ::testing::Values(ov::test::SpecialValue::none), ::testing::Values(CPUSpecificParams({nchw}, {nchw}, {}, {}))), ConvertToBooleanCPULayerTest::getTestCaseName); @@ -60,6 +63,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_BOOL_Dynamic, ConvertToBoolea ::testing::ValuesIn(inShapes_4D_dynamic()), ::testing::ValuesIn(precisions_floating_point), ::testing::Values(ov::element::boolean), + ::testing::Values(ov::test::SpecialValue::none), ::testing::Values(CPUSpecificParams({nchw}, {nchw}, {}, {}))), ConvertToBooleanCPULayerTest::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/stateful_init_graph.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/stateful_init_graph.cpp new file mode 100644 index 00000000000000..9186b43d3d863e --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/stateful_init_graph.cpp @@ -0,0 +1,314 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/ov_tensor_utils.hpp" +#include "utils/cpu_test_utils.hpp" + +using namespace ov::test; +using namespace CPUTestUtils; +using InitGraphStatefulModelTestParams = std::tuple, // input shapes + bool // ReadValue Assgin Direct pair or not + >; +class InitGraphStatefulModelBase : virtual public ov::test::SubgraphBaseTest, + public testing::WithParamInterface, + public CPUTestsBase { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + std::ostringstream result; + + std::vector inputShapes; + bool directPair; + std::tie(inputShapes, directPair) = obj.param; + + result << "IS="; + for (const auto& shape : inputShapes) { + result << ov::test::utils::partialShape2str({shape.first}) << "_"; + } + result << "TS="; + for (const auto& shape : inputShapes) { + result << "("; + if (!shape.second.empty()) { + for (const auto& itr : shape.second) { + result << ov::test::utils::vec2str(itr); + } + } + result << ")"; + } + result << "_DirectAssign=" << ov::test::utils::bool2str(directPair); + result << ")"; + + return result.str(); + } + + std::vector calculate_refs() override { + for (const auto& param : functionRefs->get_parameters()) { + inferRequestRef.set_tensor(param->get_default_output(), inputs.at(matched_parameters[param])); + } + inferRequestRef.infer(); + + auto outputs = std::vector{}; + for (const auto& output : functionRefs->outputs()) { + outputs.push_back(inferRequestRef.get_tensor(output)); + } + + return outputs; + } + + std::vector get_plugin_outputs() override { + for (const auto& input : inputs) { + inferRequest.set_tensor(input.first, input.second); + } + inferRequest.infer(); + auto outputs = std::vector{}; + for (const auto& output : function->outputs()) { + outputs.push_back(inferRequest.get_tensor(output)); + } + return outputs; + } + + void run() override { + prepare(); + + auto&& states = inferRequest.query_state(); + auto&& refStates = inferRequestRef.query_state(); + + for (size_t i = 0; i < targetStaticShapes.size(); i++) { + for (auto iters = 0; iters < 5; iters++) { + generate_inputs(targetStaticShapes[i]); + + if (iters & 0x1) { + states.front().reset(); + refStates.front().reset(); + } else { + // generate and set state tensors every even iteration + using ov::test::utils::InputGenerateData; + + auto stateShape = get_state_shape(i); + auto tensor = utils::create_and_fill_tensor(statePrc, + stateShape, + InputGenerateData{0, 1, 1, iters}); + states.front().set_state(tensor); + refStates.front().set_state(tensor); + } + + validate(); + } + } + } + +protected: + virtual void check_init_graph_node() = 0; + + virtual ov::Shape get_state_shape(size_t i) = 0; + + void prepare() { + compile_model(); + + inferRequest = compiledModel.create_infer_request(); + ASSERT_TRUE(inferRequest); + + check_init_graph_node(); + + // ref + functionRefs = function->clone(); + + matched_parameters.clear(); + const auto& ref_params = functionRefs->get_parameters(); + const auto& params = function->get_parameters(); + for (size_t in_idx = 0; in_idx < params.size(); ++in_idx) { + matched_parameters.insert({ref_params[in_idx], params[in_idx]}); + } + + auto compiledModelRef = core->compile_model(functionRefs, ov::test::utils::DEVICE_TEMPLATE); + inferRequestRef = compiledModelRef.create_infer_request(); + } + + std::vector inputShapes; + const ov::element::Type netPrc = ElementType::f32; + ov::InferRequest inferRequestRef; + ov::element::Type statePrc; +}; + +// ReadValue Assign direct pair +// +// input_1 input_2 +// | | +// Add_1 / +// \ / +// MatMul +// | +// input_0 ReadValue .......... +// \ / \ . +// Add_0 Assign ........ +// | +// Result + +class InitGraphStatefulModel : public InitGraphStatefulModelBase { +public: + void SetUp() override { + targetDevice = utils::DEVICE_CPU; + + bool directPair; + std::tie(inputShapes, directPair) = this->GetParam(); + + init_input_shapes(inputShapes); + ov::ParameterVector input_params; + for (auto&& shape : inputDynamicShapes) { + input_params.push_back(std::make_shared(netPrc, shape)); + } + + input_params[0]->set_friendly_name("input_0"); + input_params[1]->set_friendly_name("input_1"); + input_params[2]->set_friendly_name("input_2"); + + // init_graph + auto add_1 = + std::make_shared(input_params[1], ov::op::v0::Constant::create(netPrc, {1}, {1.0f})); + add_1->set_friendly_name("init_graph/add_1"); + auto mm_0 = std::make_shared(add_1, input_params[2]); + mm_0->set_friendly_name("init_graph/mm_0"); + + const std::string variable_name("var_direct_pair"); + statePrc = netPrc; + auto variable = std::make_shared( + ov::op::util::VariableInfo{{inputDynamicShapes[1][0], inputDynamicShapes[2][1]}, statePrc, variable_name}); + + auto read = std::make_shared(mm_0, variable); + std::shared_ptr add_0 = std::make_shared(input_params[0], read); + add_0->set_friendly_name("add_0"); + auto assign = std::make_shared(directPair ? read : add_0, variable); + auto res = std::make_shared(add_0); + function = std::make_shared(ov::ResultVector({res}), ov::SinkVector({assign}), input_params); + } + + void check_init_graph_node() override { + // Node with friendly name "init_graph/add_1" and init_graph/mm_0 should be moved into subgraph. + CheckNumberOfNodesWithType(compiledModel, "Add", 0); + CheckNumberOfNodesWithType(compiledModel, "MatMul", 0); + } + + ov::Shape get_state_shape(size_t i) override { + return ov::Shape({inputShapes[1].second[i][0], inputShapes[2].second[i][1]}); + } +}; + +TEST_P(InitGraphStatefulModel, CompareWithRefs) { + run(); +} + +// ReadValueWithSubgraph have different precision. +// +// input[fp32] +// | +// Convert[fp32->fp16] +// | +// ReadValue .......... +// / \ . +// Add Assign ....... +// | +// Result +class InitGraphStatefulDiffPrimitiveModel : public InitGraphStatefulModelBase { +public: + void SetUp() override { + targetDevice = utils::DEVICE_CPU; + + configuration.insert({"SNIPPETS_MODE", "DISABLE"}); + + bool directPair; + std::tie(inputShapes, directPair) = this->GetParam(); + + init_input_shapes(inputShapes); + ov::ParameterVector input_params; + for (auto&& shape : inputDynamicShapes) { + input_params.push_back(std::make_shared(netPrc, shape)); + } + + input_params[0]->set_friendly_name("input"); + + // init_graph + auto convert = std::make_shared(input_params[0], ov::element::f16); + convert->set_friendly_name("init_graph/convert"); + + const std::string variable_name("var_diff_precison"); + statePrc = ov::element::f16; + auto variable = std::make_shared( + ov::op::util::VariableInfo{{inputDynamicShapes[0]}, statePrc, variable_name}); + + auto readvalue = std::make_shared(convert, variable); + + std::shared_ptr add = + std::make_shared(readvalue, ov::op::v0::Constant::create(ov::element::f16, {1}, {1.0f})); + + auto assign = std::make_shared(directPair ? readvalue : add, variable); + + auto res = std::make_shared(add); + + function = std::make_shared(ov::ResultVector({res}), ov::SinkVector({assign}), input_params); + } + + void check_init_graph_node() override { + CheckNumberOfNodesWithType(compiledModel, "Convert", 1); + } + + ov::Shape get_state_shape(size_t i) override { + return inputShapes[0].second[i]; + } +}; + +TEST_P(InitGraphStatefulDiffPrimitiveModel, CompareWithRefs) { + run(); +} + +namespace { +const std::vector> inputShapes = { + { + // Dynamic shape. + {{1, -1}, {{1, 2}, {1, 2}, {1, 1}}}, + {{2, -1}, {{2, 3}, {2, 10}, {2, 1}}}, + {{-1, 2}, {{3, 2}, {10, 2}, {1, 2}}}, + }, + { + // Static shape. + {{1, 1}, {{1, 1}}}, + {{4, 2}, {{4, 2}}}, + {{2, 10}, {{2, 10}}}, + } +}; + +const std::vector readValueAssginDirectPair = {true, false}; + +const auto testParams_smoke = ::testing::Combine( + ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(readValueAssginDirectPair)); + +INSTANTIATE_TEST_SUITE_P(smoke_StatefulInitGraph, + InitGraphStatefulModel, + testParams_smoke, + InitGraphStatefulModel::getTestCaseName); + + +const std::vector> inputShapesDiffPrecision = { + { + // Dynamic shape. + {{1, -1}, {{1, 10}, {1, 1}}}, + }, + { + // Static shape. + {{1, 1}, {{1, 1}}}, + } +}; + +const auto testParamsDiffPrecision_smoke = ::testing::Combine( + ::testing::ValuesIn(inputShapesDiffPrecision), + ::testing::ValuesIn(readValueAssginDirectPair)); + +INSTANTIATE_TEST_SUITE_P(smoke_StatefulInitGraph, + InitGraphStatefulDiffPrimitiveModel, + testParamsDiffPrecision_smoke, + InitGraphStatefulDiffPrimitiveModel::getTestCaseName); + +} // namespace + diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/bf16_convert_saturation.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/bf16_convert_saturation.cpp new file mode 100644 index 00000000000000..96c08eeffed15a --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/bf16_convert_saturation.cpp @@ -0,0 +1,114 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/ov_tensor_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "utils/fusing_test_utils.hpp" + +using namespace CPUTestUtils; +namespace ov { +namespace test { +/* + This test aims to cover Eltwise node BF16 output precision conversion logic in "saturation" mode. In this test, we + have a select node with condition input of boolean type and then/else inputs of f32 type(as constant node with bf16 + overflow data). The select node is followed by a convolution node to ensoure that it is converted to bf16 precision. +*/ +using selectParams = std::tuple; +class BF16ConvertSaturation : public testing::WithParamInterface, + virtual public SubgraphBaseTest, + public CpuTestWithFusing { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + InputShape shapes; + ElementType precision; + std::tie(shapes, precision) = obj.param; + + std::ostringstream result; + result << "Condition_prc_" << ElementType::boolean << "_Then_Else_prc_" << precision << "_"; + result << "IS=(" << shapes.first << ")_TS=("; + for (const auto& item : shapes.second) { + result << ov::test::utils::vec2str(item) << "_"; + } + result << "PluginConf_inference_precision=bf16"; + + return result.str(); + } + +protected: + void SetUp() override { + abs_threshold = 0; + targetDevice = ov::test::utils::DEVICE_CPU; + InputShape shapes; + ElementType precision; + std::tie(shapes, precision) = this->GetParam(); + init_input_shapes({shapes}); + std::tie(inFmts, outFmts, priority, selectedType) = emptyCPUSpec; + selectedType = makeSelectedTypeStr(getPrimitiveType(), ov::element::i8); + ov::element::TypeVector types{ov::element::boolean, precision, precision}; + ov::ParameterVector parameters; + auto param = std::make_shared(ov::element::boolean, inputDynamicShapes[0]); + parameters.push_back(param); + + ov::test::utils::InputGenerateData in_data; + in_data.start_from = -3.40282e+38; + in_data.range = 1; + in_data.resolution = 1; + auto thenTensor = ov::test::utils::create_and_fill_tensor(precision, ov::Shape{1}, in_data); + + in_data.start_from = 3.40282e+38; + in_data.range = 10; + in_data.resolution = 2; + auto elseTensor = ov::test::utils::create_and_fill_tensor(precision, ov::Shape{2, 1, 32, 32}, in_data); + + auto select = std::make_shared(parameters[0], + std::make_shared(thenTensor), + std::make_shared(elseTensor), + ov::op::AutoBroadcastType::NUMPY); + + auto conv_filter_shape = ov::Shape{1, 1, 3, 3}; + auto conv_filter = ov::op::v0::Constant::create(ElementType::f32, conv_filter_shape, {1}); + auto strides = ov::Strides{1, 1}; + auto pads_begin = ov::CoordinateDiff{0, 0}; + auto pads_end = ov::CoordinateDiff{0, 0}; + auto dilations = ov::Strides{1, 1}; + auto conv = + std::make_shared(select, conv_filter, strides, pads_begin, pads_end, dilations); + + function = makeNgraphFunction(ElementType::f32, parameters, conv, "Eltwise"); + configuration.insert({ov::hint::inference_precision(ov::element::bf16)}); + } + + void generate_inputs(const std::vector& targetInputStaticShapes) override { + inputs.clear(); + const auto& modelInputs = function->inputs(); + ov::test::utils::InputGenerateData in_data; + in_data.start_from = -1; + in_data.range = 3; + in_data.resolution = 2; + auto condTensor = ov::test::utils::create_and_fill_tensor(modelInputs[0].get_element_type(), + targetInputStaticShapes[0], + in_data); + + inputs.insert({modelInputs[0].get_node_shared_ptr(), condTensor}); + } +}; + +TEST_P(BF16ConvertSaturation, CompareWithRefs) { + run(); +} + +const std::vector inShapes = { + // Condition + {{-1, -1, -1, -1}, {{2, 1, 32, 32}}}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_BF16ConvertSaturationTest, + BF16ConvertSaturation, + ::testing::Combine(::testing::ValuesIn(inShapes), ::testing::Values(ElementType::f32)), + BF16ConvertSaturation::getTestCaseName); + +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/conversion.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/conversion.cpp index 9ff4d0b989fefa..903b8c083b1a1f 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/conversion.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/conversion.cpp @@ -32,6 +32,17 @@ const std::vector types = { ov::element::f64, }; +const std::vector floatTypes = { + ov::element::f32, + ov::element::f16, + ov::element::bf16, +}; + +const std::vector f8Types = { + ov::element::f8e4m3, + ov::element::f8e5m2, +}; + INSTANTIATE_TEST_SUITE_P(smoke_ConversionLayerTest, ConversionLayerTest, ::testing::Combine(::testing::ValuesIn(conversionOpTypes), @@ -49,4 +60,23 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConversionToBooleanLayerTest, ::testing::Values(ov::element::boolean), ::testing::Values(ov::test::utils::DEVICE_CPU)), ConversionLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_ConversionToF8LayerTest, + ConversionLayerTest, + ::testing::Combine(::testing::Values(conversionOpTypes[0]), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes)), + ::testing::ValuesIn(floatTypes), + ::testing::ValuesIn(f8Types), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + ConversionLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_ConversionFromF8LayerTest, + ConversionLayerTest, + ::testing::Combine(::testing::Values(conversionOpTypes[0]), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes)), + ::testing::ValuesIn(f8Types), + ::testing::ValuesIn(floatTypes), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + ConversionLayerTest::getTestCaseName); + } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/fake_convert.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/fake_convert.cpp new file mode 100644 index 00000000000000..a2f17ea72cbb3e --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/fake_convert.cpp @@ -0,0 +1,59 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_op_tests/fake_convert.hpp" + +namespace { +using ov::test::FakeConvertLayerTest; + +const std::vector> shapes = {{{2, 3, 4, 5}}}; + +const std::vector data_precisions = {ov::element::f32, ov::element::f16, ov::element::bf16}; + +const std::vector destination_precisions = {ov::element::f8e4m3, ov::element::f8e5m2}; + +const std::vector default_shift = {true, false}; + +const auto simple_fake_convert_params = + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes)), + ::testing::Values(ov::Shape{1}), + ::testing::Values(ov::Shape{1}), + ::testing::ValuesIn(data_precisions), + ::testing::ValuesIn(destination_precisions), + ::testing::ValuesIn(default_shift), + ::testing::Values(ov::test::utils::DEVICE_CPU)); + +const auto broadcast_fake_convert_params = + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes)), + ::testing::Values(ov::Shape{2, 3, 1, 1}), + ::testing::Values(ov::Shape{2, 3, 1, 1}), + ::testing::ValuesIn(data_precisions), + ::testing::ValuesIn(destination_precisions), + ::testing::ValuesIn(default_shift), + ::testing::Values(ov::test::utils::DEVICE_CPU)); + +const auto elementwise_fake_convert_params = + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes)), + ::testing::Values(ov::Shape{2, 3, 4, 5}), + ::testing::Values(ov::Shape{2, 3, 4, 5}), + ::testing::ValuesIn(data_precisions), + ::testing::ValuesIn(destination_precisions), + ::testing::ValuesIn(default_shift), + ::testing::Values(ov::test::utils::DEVICE_CPU)); + +INSTANTIATE_TEST_SUITE_P(smoke_FakeConvert_simple, + FakeConvertLayerTest, + simple_fake_convert_params, + FakeConvertLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_FakeConvert_broadcast, + FakeConvertLayerTest, + broadcast_fake_convert_params, + FakeConvertLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_FakeConvert_elementwise, + FakeConvertLayerTest, + elementwise_fake_convert_params, + FakeConvertLayerTest::getTestCaseName); +} // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 7af707df602bfc..4c34b3fd2506ac 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -173,6 +173,8 @@ std::vector disabledTestPatterns() { R"(.*smoke_TopK/TopKLayerTest.Inference.*_k=21_.*_sort=value_modelType=f16_trgDev=CPU.*)", // Issue: 121812 R"(.*ConvertCPULayerTest.*outFmts=(nhwc|nChw8c|nChw16c).*)", + // Issue: MFDNN-12917. The oneDNN emitter of conversion from fp32 to fp8 has rounding issue. + R"(.*ConvertCPULayerTest.*(\[1.1.1080.1920\]|\(2.17.5.4\))_.*_inputPRC=f32_targetPRC=f8e4m3_.*)", // Need to generate sequence exactly in the i64 data type. Enable in scope of i64 enabling. R"(.*RandomUniformLayerTestCPU.*OutPrc=i64.*)", // Issue: 123815 (Tests are sensintive to available thread count on testing machines) @@ -529,6 +531,7 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(.*INFERENCE_PRECISION_HINT=(F|f)16.*)"); retVector.emplace_back(R"(.*ConcatMultiQuerySDPTest.*f16.*)"); retVector.emplace_back(R"(.*ConcatSDPTest.*f16.*)"); + retVector.emplace_back(R"(.*ConvertCPULayerTest.*f16.*)"); } #elif defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM) if (!ov::intel_cpu::hasHardwareSupport(ov::element::f16)) { @@ -536,6 +539,7 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(.*INFERENCE_PRECISION_HINT=(F|f)16.*)"); retVector.emplace_back(R"(.*Prc=f16.*)"); retVector.emplace_back(R"(.*ConcatMultiQuerySDPTest.*f16.*HasShapeOf=1.*)"); + retVector.emplace_back(R"(.*ConvertCPULayerTest.*f16.*)"); } else { // Issue 117407 retVector.emplace_back( diff --git a/src/plugins/intel_cpu/tests/unit/transformations/readvalue_subgraph.cpp b/src/plugins/intel_cpu/tests/unit/transformations/readvalue_subgraph.cpp new file mode 100644 index 00000000000000..3656130b579edd --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/transformations/readvalue_subgraph.cpp @@ -0,0 +1,232 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/read_value.hpp" +#include "transformations/cpu_opset/common/op/read_value_with_subgraph.hpp" + +using namespace testing; +/**************************************************************** + * Pattern 1 (From whisper decoder): + * input input + * | | + * MatMul ReadValueWithSubgraph (MatMul) + * | -> | \ + * ReadValue Result Assign + * | \ + * Result Assign + ****************************************************************/ +static std::shared_ptr constructRVWithSubGraph( + std::shared_ptr input, + const ov::element::Type& type, + std::shared_ptr variable) { + auto mm_weights = std::make_shared(type, ov::Shape{2, 2}, std::vector{1, 2, 3, 4}); + + auto func_input = + std::make_shared(input->get_element_type(), input->get_output_partial_shape(0)); + + auto matmul = std::make_shared(func_input, mm_weights, false, false); + + auto func_output = std::make_shared(matmul); + + auto func = std::make_shared(ov::NodeVector({func_output}), + ov::ParameterVector{func_input}, + "state_init_submodel"); + + auto readvalue = std::make_shared(variable, func); + readvalue->set_input(input->output(0), func_input); + readvalue->set_output(func_output); + readvalue->validate_and_infer_types(); + + return readvalue; +} + +TEST(TransformationTests, ReadValueWithSubgraph_1) { + std::shared_ptr model(nullptr), model_ref(nullptr); + { + const ov::PartialShape shape{1, 1, 2}; + const ov::element::Type type = ov::element::f32; + std::shared_ptr variable = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape{1, 1, 2}, type, "var_id"}); + + { + auto input = std::make_shared(type, shape); + + auto mm_weights = + std::make_shared(type, ov::Shape{2, 2}, std::vector{1, 2, 3, 4}); + + auto matmul = std::make_shared(input, mm_weights, false, false); + + auto readvalue = std::make_shared(matmul, variable); + + auto assign = std::make_shared(readvalue, variable); + + auto result = std::make_shared(readvalue); + model = std::make_shared(ov::ResultVector{result}, + ov::SinkVector{assign}, + ov::ParameterVector{input}); + + ov::pass::Manager manager; + manager.register_pass(); + manager.run_passes(model); + } + { + auto input = std::make_shared(type, shape); + + auto readvalue = constructRVWithSubGraph(input, type, variable); + + auto assign = std::make_shared(readvalue, variable); + + auto result = std::make_shared(readvalue); + + model_ref = std::make_shared(ov::ResultVector{result}, + ov::SinkVector{assign}, + ov::ParameterVector{input}); + } + auto res = compare_functions(model, model_ref, 0, 0, 0, 0, 0, 0); + ASSERT_TRUE(res.first) << res.second; + } +} + +/*************************************************************************************************** + * Pattern 2 (Complex pattern): + * input input + * | | + * Convert Convert + * / | \ / | \ + * / | \ / Add2 \ + * Add1 Add2 \ | | \ | + * | | \ | ---> | | Add3 + * \ | Add3 | | / \ + * \ | / \ ReadValueWithSubgraph Result2 Subgraph(Add1, Add4, Add5) + * \ Add4 \ / \ + * \ | \ Result1 Assign + * Add5 Result2 + * | + * ReadValue + * / \ + * Result1 Assign + * + ***************************************************************************************************/ + +static std::shared_ptr create_const_node(ov::Shape shape) { + return std::make_shared(ov::element::i32, shape, std::vector{1}); +} + +static std::shared_ptr constructRVWithSubGraph2( + ov::NodeVector inputs, + const ov::element::Type& type, + std::shared_ptr variable) { + ov::ParameterVector func_inputs; + for (auto input : inputs) { + auto func_input = + std::make_shared(input->get_element_type(), input->get_output_partial_shape(0)); + func_inputs.push_back(func_input); + } + + auto add1 = std::make_shared(func_inputs[0], create_const_node(ov::Shape{4})); + + auto add4 = std::make_shared(func_inputs[1], func_inputs[2]); + + auto add5 = std::make_shared(add1, add4); + + auto func_output = std::make_shared(add5); + + auto func = std::make_shared(ov::NodeVector({func_output}), func_inputs, "state_init_submodel"); + + auto readvalue = std::make_shared(variable, func); + for (size_t i = 0; i < inputs.size(); i++) { + readvalue->set_input(inputs[i]->output(0), func_inputs[i]); + } + readvalue->set_output(func_output); + readvalue->validate_and_infer_types(); + + return readvalue; +} + +TEST(TransformationTests, ReadValueWithSubgraph_2) { + std::shared_ptr model(nullptr), model_ref(nullptr); + { + const ov::PartialShape shape{1, 2, 4}; + const ov::element::Type in_type = ov::element::f32; + const ov::element::Type out_type = ov::element::i32; + + std::shared_ptr variable = + std::make_shared(ov::op::util::VariableInfo{shape, out_type, "var_id"}); + + { + auto input = std::make_shared(in_type, shape); + input->set_friendly_name("input"); + + auto convert = std::make_shared(input, out_type); + convert->set_friendly_name("convert"); + + auto add1 = std::make_shared(convert, create_const_node(ov::Shape{4})); + add1->set_friendly_name("add1"); + + auto add2 = std::make_shared(convert, create_const_node(ov::Shape{4})); + add2->set_friendly_name("add2"); + + auto add3 = std::make_shared(add2, convert); + add3->set_friendly_name("add3"); + + auto add4 = std::make_shared(add2, add3); + add4->set_friendly_name("add4"); + + auto add5 = std::make_shared(add1, add4); + add5->set_friendly_name("add5"); + + auto readvalue = std::make_shared(add5, variable); + readvalue->set_friendly_name("readvalue"); + + auto assign = std::make_shared(readvalue, variable); + assign->set_friendly_name("assign"); + + auto result1 = std::make_shared(readvalue); + result1->set_friendly_name("result1"); + + auto result2 = std::make_shared(add3); + result2->set_friendly_name("result2"); + + model = std::make_shared(ov::ResultVector{result1, result2}, + ov::SinkVector{assign}, + ov::ParameterVector{input}); + + ov::pass::Manager manager; + manager.register_pass(); + manager.run_passes(model); + } + { + auto input = std::make_shared(in_type, shape); + + auto convert = std::make_shared(input, out_type); + + auto add2 = std::make_shared(convert, create_const_node(ov::Shape{4})); + + auto add3 = std::make_shared(add2, convert); + + auto readvalue = constructRVWithSubGraph2({convert, add2, add3}, out_type, variable); + + auto assign = std::make_shared(readvalue, variable); + + auto result1 = std::make_shared(readvalue); + + auto result2 = std::make_shared(add3); + + model_ref = std::make_shared(ov::ResultVector{result1, result2}, + ov::SinkVector{assign}, + ov::ParameterVector{input}); + } + auto res = compare_functions(model, model_ref, 0, 0, 0, 0, 0, 0); + ASSERT_TRUE(res.first) << res.second; + } +} \ No newline at end of file diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp index c7524f1880157d..0950614897ab43 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp @@ -267,6 +267,7 @@ REGISTER_FACTORY(v13, ScaledDotProductAttention); REGISTER_FACTORY(v13, BitwiseAnd); REGISTER_FACTORY(v13, BitwiseOr); REGISTER_FACTORY(v13, BitwiseXor); +REGISTER_FACTORY(v13, FakeConvert); // ------------------------------ Supported v15 ops ----------------------------- // REGISTER_FACTORY(v15, ROIAlignRotated); diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/fake_convert.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/fake_convert.hpp new file mode 100644 index 00000000000000..c16af0be51abda --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/fake_convert.hpp @@ -0,0 +1,68 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once +#include "primitive.hpp" +#include + +namespace cldnn { + +/// @brief FakeConvert performs element-wise quantization of input values +/// into a set of values corresponding to a target low-precision type. +struct fake_convert : public primitive_base { + CLDNN_DECLARE_PRIMITIVE(fake_convert) + + fake_convert() : primitive_base("", {}) {} + + /// @brief Constructs fake_convert primitive. + /// @param id This primitive id. + /// @param input Input primitive id. + /// @param scale Scale primitive id. + /// @param shift Shift primitive id. + /// @param destination_type The low precision type to be emulated. + fake_convert(const primitive_id& id, + const input_info& input, + const input_info& scale, + const input_info& shift, + ov::element::Type destination_type = ov::element::Type_t::f8e4m3) + : primitive_base(id, {input, scale, shift}, 1), destination_type(destination_type) {} + + /// @brief Constructs fake_convert primitive. + /// @param id This primitive id. + /// @param input Input primitive id. + /// @param scale Scale primitive id. + /// @param shift Shift primitive id. + /// @param destination_type The low precision type to be emulated. + fake_convert(const primitive_id& id, + const input_info& input, + const input_info& scale, + ov::element::Type destination_type = ov::element::Type_t::f8e4m3) + : primitive_base(id, {input, scale}, 1), destination_type(destination_type) {} + + ov::element::Type destination_type; + + size_t hash() const override { + size_t seed = primitive::hash(); + seed = hash_combine(seed, destination_type.get_type_name()); + return seed; + } + + bool operator==(const primitive& rhs) const override { + if (!compare_common_params(rhs)) + return false; + auto rhs_casted = downcast(rhs); + return (destination_type == rhs_casted.destination_type); + } + + void save(BinaryOutputBuffer& ob) const override { + primitive_base::save(ob); + ob << make_data(&destination_type, sizeof(destination_type)); + } + + void load(BinaryInputBuffer& ib) override { + primitive_base::load(ib); + ib >> make_data(&destination_type, sizeof(destination_type)); + } +}; +} // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp index e84311a9cfb592..c83b1127e2d44c 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/implementation_desc.hpp @@ -19,6 +19,7 @@ enum class impl_types : uint8_t { ocl = 1 << 2, onednn = 1 << 3, sycl = 1 << 4, + cm = 1 << 5, any = 0xFF, }; @@ -43,6 +44,7 @@ inline std::ostream& operator<<(std::ostream& out, const impl_types& impl_type) case impl_types::common: out << "common"; break; case impl_types::ocl: out << "ocl"; break; case impl_types::onednn: out << "onednn"; break; + case impl_types::cm: out << "cm"; break; case impl_types::any: out << "any"; break; default: out << "unknown"; break; } @@ -61,6 +63,8 @@ inline std::istream& operator>>(std::istream& is, impl_types& impl_type) { impl_type = impl_types::ocl; } else if (str == "onednn") { impl_type = impl_types::onednn; + } else if (str == "cm") { + impl_type = impl_types::cm; } else if (str == "any") { impl_type = impl_types::any; } else { diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/paged_attention.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/paged_attention.hpp index f87f608597a6bb..2638f2ad60cf26 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/paged_attention.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/paged_attention.hpp @@ -24,6 +24,10 @@ struct paged_attention : public primitive_base { OPENVINO_ASSERT(inputs.size() == 13, "[GPU] Unexpected inputs number for PagedAttention primitive: ", inputs.size()); } + bool has_scores_output() const { + return num_outputs == 2; + } + bool operator==(const primitive& rhs) const override { return compare_common_params(rhs); } diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp index 09dfcf68f05725..9a26768d0fc068 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp @@ -16,6 +16,11 @@ struct work_group_sizes { std::vector local; }; +enum class kernel_language { + OCLC, + CM, +}; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Scalar //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -122,8 +127,10 @@ struct kernel_string { std::string entry_point; bool batch_compilation; bool has_microkernels; + kernel_language language; - kernel_string() : str(""), jit(""), undefs(""), options(""), entry_point(""), batch_compilation(false), has_microkernels(false) {} + kernel_string() : str(""), jit(""), undefs(""), options(""), entry_point(""), + batch_compilation(false), has_microkernels(false), language(kernel_language::OCLC) {} std::string get_str() const { return str + jit + undefs + options + entry_point; } size_t get_hash() const { return std::hash()(get_str()); } diff --git a/src/plugins/intel_gpu/src/graph/fake_convert.cpp b/src/plugins/intel_gpu/src/graph/fake_convert.cpp new file mode 100644 index 00000000000000..b201378d52cc8d --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/fake_convert.cpp @@ -0,0 +1,72 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "fake_convert_inst.h" +#include "fake_convert_shape_inference.hpp" + +#include "primitive_type_base.h" +#include "intel_gpu/runtime/error_handler.hpp" +#include "json_object.h" +#include + +namespace cldnn { +GPU_DEFINE_PRIMITIVE_TYPE_ID(fake_convert) + +layout fake_convert_inst::calc_output_layout(fake_convert_node const& node, kernel_impl_params const& impl_param) { + return calc_output_layouts(node, impl_param)[0]; +} + +template +std::vector fake_convert_inst::calc_output_layouts(fake_convert_node const& node, kernel_impl_params const& impl_param) { + const auto& input_layout = impl_param.get_input_layout(0); + auto output_type = ov::element::Type(input_layout.data_type); + + OPENVINO_ASSERT(ov::element::Type::merge(output_type, output_type, ov::element::Type(impl_param.get_input_layout(1).data_type)), + "Mixed input types are not supported."); + + if (impl_param.input_layouts.size() == 3) { + OPENVINO_ASSERT(ov::element::Type::merge(output_type, output_type, ov::element::Type(impl_param.get_input_layout(2).data_type)), + "Mixed input types are not supported."); + } + + switch (output_type) { + case ov::element::bf16: + case ov::element::f16: + case ov::element::f32: + break; + default: + OPENVINO_THROW("The output data type should be a bf16, f16, f32 but got: ", output_type); + } + + return { layout{input_layout.get_partial_shape(), output_type, input_layout.format} }; +} + +template std::vector fake_convert_inst::calc_output_layouts(fake_convert_node const& node, const kernel_impl_params& impl_param); + +std::string fake_convert_inst::to_string(fake_convert_node const& node) { + auto desc = node.get_primitive(); + auto node_info = node.desc_to_json(); + auto& input = node.input(); + auto& scale = node.scale(); + + std::stringstream primitive_description; + + json_composite fake_convert_info; + fake_convert_info.add("input id", input.id()); + fake_convert_info.add("scale id", scale.id()); + if (node.has_shift()) { + fake_convert_info.add("shift id", node.shift().id()); + } + fake_convert_info.add("destination_type", node.get_destination_type().get_type_name()); + + node_info->add("fake_convert info", fake_convert_info); + node_info->dump(primitive_description); + + return primitive_description.str(); +} + +fake_convert_inst::typed_primitive_inst(network& network, fake_convert_node const& node) + : parent(network, node) {} + +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp index 9539117bcf4b18..a40c7dfebb9de6 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "broadcast_inst.h" #include "shape_of_inst.h" #include "read_value_inst.h" #include "reshape_inst.h" @@ -86,6 +87,13 @@ bool mark_shape_of_subgraphs::can_mark_node(const program_node& node) { return false; } + // skip mark_node for broadcast node if dependency nodes are data and shape_of + auto& dependencies = node.get_dependencies(); + if (node.is_type() && dependencies.size() == 2) { + if (dependencies[0].first->is_type() && dependencies[1].first->is_type()) + return false; + } + return true; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp index 1e5f943600fc05..ac7810c6e9154c 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp @@ -295,6 +295,12 @@ void remove_redundant_reorders::run(program& p) { auto o_layout = r_node.get_output_layout(); const auto& i_layout = r_node.get_input_layout(0); + auto is_r_node_rank_changed = r_node.get_output_layout().get_rank() != r_node.get_dependency(0).get_output_layout().get_rank(); + if (is_r_node_rank_changed && + ((!update_implementations && r_node.get_dependency(0).is_type()) || + (r_node.get_dependency(0).is_type() && r_node.get_dependency(0).can_be_optimized()))) + continue; + // Optimize reorder b_fs_yx_fsv16 -> bfyx when spatials are equal to 1. In this case we can reinterpret buffer, // but pads need to be handled correctly. if (i_layout.format == format::b_fs_yx_fsv16 && o_layout.format == format::bfyx && !r_node.is_output() && diff --git a/src/plugins/intel_gpu/src/graph/impls/cm/impl_example.cpp b/src/plugins/intel_gpu/src/graph/impls/cm/impl_example.cpp new file mode 100644 index 00000000000000..c4ec8da18c7136 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/cm/impl_example.cpp @@ -0,0 +1,66 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "impls/cm/impl_example.hpp" + +#include "fully_connected/cm/fully_connected_cm_kernel_selector.h" +#include "fully_connected/fully_connected_params.h" +#include "fully_connected_inst.h" +#include "impls/ocl/primitive_base.hpp" + +namespace cldnn { +namespace cm { + +struct example_impl : ocl::typed_primitive_impl_ocl { + using parent = typed_primitive_impl_ocl; + using parent::parent; + using kernel_selector_t = kernel_selector::fully_connected_cm_kernel_selector; + using kernel_params_t = kernel_selector::fully_connected_params; + + DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::cm::example_impl) + + example_impl() = default; + + std::unique_ptr clone() const override { + return make_deep_copy(*this); + } + +protected: + kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { + kernel_arguments_data args = parent::get_arguments(instance); + const auto& desc = instance.get_typed_desc(); + + args.weights = instance.weights_memory(); + args.bias = instance.bias_term() ? instance.bias_memory() : nullptr; + + args.inputs = {instance.input_memory_ptr(0)}; + size_t in_id = instance.bias_term() ? 3 : 2; + if (!desc->decompression_scale.empty()) + args.inputs.push_back(instance.dep_memory_ptr(in_id++)); + + if (!desc->decompression_zero_point.empty()) + args.inputs.push_back(instance.dep_memory_ptr(in_id)); + + return args; + } + +public: + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { + auto params = get_weights_bias_default_params(impl_param, + false, + is_shape_agnostic); + return params; + } +}; +std::unique_ptr ExampleImplementationManager::create_impl(const program_node& node, + const kernel_impl_params& params) const { + OPENVINO_ASSERT(node.is_type()); + return ocl::typed_primitive_impl_ocl::create( + static_cast(node), + params); +} +} // namespace cm +} // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::cm::example_impl) diff --git a/src/plugins/intel_gpu/src/graph/impls/cm/impl_example.hpp b/src/plugins/intel_gpu/src/graph/impls/cm/impl_example.hpp new file mode 100644 index 00000000000000..0208da12a2f378 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/cm/impl_example.hpp @@ -0,0 +1,36 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "fully_connected_inst.h" +#include "impls/registry/implementation_manager.hpp" + +namespace cldnn { +namespace cm { + +struct ExampleImplementationManager : public ImplementationManager { + OV_GPU_PRIMITIVE_IMPL("cm::example") + ExampleImplementationManager(shape_types shape_type, ValidateFunc vf = nullptr) + : ImplementationManager(impl_types::cm, shape_type, vf) {} + + std::unique_ptr create_impl(const program_node& node, + const kernel_impl_params& params) const override; + + bool validate_impl(const program_node& node) const override { + assert(node.is_type()); + + auto &engine = node.get_program().get_engine(); + auto &config = node.get_program().get_config(); + if (!check_cm_jit_support(engine, config)) { + return false; + } + + // Example impl should not be chosen unless forced + return false; + } +}; + +} // namespace cm +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp new file mode 100644 index 00000000000000..a5f94741c40bf5 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp @@ -0,0 +1,131 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "impls/cpu/cpu_impl_helpers.hpp" +#include "register.hpp" +#include "fake_convert_inst.h" +#include "impls/registry/implementation_map.hpp" + +#include "openvino/op/fake_convert.hpp" + +namespace cldnn { +namespace cpu { + +struct fake_convert_impl : public typed_primitive_impl { + using parent = typed_primitive_impl; + using parent::parent; + + ov::element::Type destination_type; + + std::shared_ptr op; + + DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::cpu::fake_convert_impl) + + std::unique_ptr clone() const override { + return make_unique(*this); + } + + fake_convert_impl() : parent("fake_convert_cpu_impl") {} + + explicit fake_convert_impl(const fake_convert_node& outer) { + set_node_params(outer); + } + + void set_node_params(const program_node& arg) override { + OPENVINO_ASSERT(arg.is_type(), "[GPU] Incorrect program_node type"); + const auto& node = arg.as(); + destination_type = node.get_destination_type(); + } + + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); + ob << make_data(&destination_type, sizeof(destination_type)); + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + ib >> make_data(&destination_type, sizeof(destination_type)); + } + + event::ptr execute_impl(const std::vector& events, fake_convert_inst& instance) override { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "fake_convert::execute_impl"); + auto& stream = instance.get_network().get_stream(); + + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); + + if (!pass_through_events) { + stream.wait_for_events(events); + } + + auto params = instance.get_impl_params(); + + ov::TensorVector input_host_tensors; + ov::TensorVector output_host_tensors; + + if (!op) { + op = std::make_shared(); + op->set_destination_type(destination_type); + } + + std::vector input_mem_ptrs; + for (size_t i = 0; i < instance.dependencies().size(); i++) + input_mem_ptrs.push_back(instance.dep_memory_ptr(i)); + + auto output_mem_ptr = instance.output_memory_ptr(); + + cldnn::mem_lock output_lock(output_mem_ptr, stream); + + for (size_t i = 0; i < input_mem_ptrs.size(); i++) + input_host_tensors.push_back(make_tensor(params->input_layouts[i], input_mem_ptrs[i]->lock(stream, mem_lock_type::read))); + + output_host_tensors.push_back(make_tensor(params->output_layouts[0], output_lock.data())); + + OPENVINO_ASSERT(op->evaluate(output_host_tensors, input_host_tensors), + "[GPU] Couldn't execute fake_convert primitive with id ", instance.id()); + + if (pass_through_events) { + return stream.group_events(events); + } + + return make_output_event(stream, instance.is_output()); + } + + void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} + + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} + +public: + static std::unique_ptr create(const fake_convert_node& arg, const kernel_impl_params& impl_param) { + return make_unique(); + } +}; + + +namespace detail { + +attach_fake_convert_impl::attach_fake_convert_impl() { + auto formats = { + format::bfyx, + format::bfzyx, + format::bfwzyx, + format::bfuwzyx, + format::bfvuwzyx, + }; + + auto types = { + data_types::f32, + data_types::f16, + data_types::bf16 + }; + + implementation_map::add(impl_types::cpu, shape_types::static_shape, fake_convert_impl::create, types, formats); + implementation_map::add(impl_types::cpu, shape_types::dynamic_shape, fake_convert_impl::create, types, formats); +} + +} // namespace detail +} // namespace cpu +} // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::cpu::fake_convert_impl) +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::fake_convert) diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp index 2b0dc5b212158c..e86628444de439 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp @@ -31,6 +31,7 @@ void register_implementations() { REGISTER_CPU(tile); REGISTER_CPU(select); REGISTER_CPU(reduce); + REGISTER_CPU(fake_convert); } } // namespace cpu diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp b/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp index cb89eae29d8c56..15cc4b11c077eb 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp @@ -56,6 +56,7 @@ REGISTER_CPU(broadcast); REGISTER_CPU(tile); REGISTER_CPU(select); REGISTER_CPU(reduce); +REGISTER_CPU(fake_convert); #undef REGISTER_CPU diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp index 42d83a0265d290..7d54129195ccc6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp @@ -121,6 +121,46 @@ bool query_local_block_io_supported(engine& e, const ExecutionConfig& config) { namespace cldnn { +bool check_cm_jit_support(cldnn::engine& e, const cldnn::ExecutionConfig& config) { + auto device = e.get_device().get(); + + static std::mutex m; + std::lock_guard lock(m); + + static std::map cache; + if (cache.find(device) != cache.end()) { + return cache.at(device); + } + + std::shared_ptr kernel_string = std::make_shared(); + // This program checks if cm sources can be jitted by current IGC version + const char* kernel_code = R""""( + #include + #include + + extern "C" _GENX_MAIN_ void cm_check() { + unsigned int id = cm_linear_global_id(); + } + )""""; + + kernel_string->str = kernel_code; + kernel_string->options = " -cmc "; + kernel_string->entry_point = "cm_check"; + kernel_string->batch_compilation = true; + + try { + cldnn::kernel_impl_params dummy_params; + auto _kernels_cache_device_query = std::unique_ptr(new cldnn::kernels_cache(e, config, 0)); + _kernels_cache_device_query->add_kernels_source(dummy_params, {kernel_string}, false); + _kernels_cache_device_query->build_all(); + cache[device] = true; + } catch (std::exception&) { + cache[device] = false; + } + + return cache.at(device); +} + bool query_microkernels_supported(cldnn::engine& e, const cldnn::ExecutionConfig& config) { auto device = e.get_device().get(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h index a8c715af98f198..bf8968fd4b255b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h @@ -306,6 +306,7 @@ inline void update_shapes(kernel_selector::Params& p, const kernel_impl_params& } } +bool check_cm_jit_support(cldnn::engine& e, const cldnn::ExecutionConfig& config); bool query_microkernels_supported(cldnn::engine& e, const cldnn::ExecutionConfig& config); } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp index 5db452dcda26f0..b122195c8e1265 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp @@ -153,8 +153,12 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, std::string entry_point = kernel_string->entry_point; std::string options = kernel_string->options; bool batch_compilation = kernel_string->batch_compilation; + bool is_cm = kernel_string->language == kernel_language::CM; - if (batch_compilation) { + auto& headers = is_cm ? cm_batch_headers : batch_headers; + + // Order matters for cm options + if (batch_compilation && !is_cm) { options = reorder_options(options); } @@ -174,7 +178,7 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, const auto& batch_id = 0; // increase bucket id if and only if new bucket comes bucket_id = static_cast(program_buckets.size() - 1); - current_bucket.push_back(batch_program(bucket_id, batch_id, options, batch_headers)); + current_bucket.push_back(batch_program(bucket_id, batch_id, options, headers, is_cm)); } // This is a temporary walk-around to avoid severe performance drop. @@ -205,7 +209,7 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, || current_bucket.back().entry_point_to_id.find(entry_point) != current_bucket.back().entry_point_to_id.end() || need_separate_batch(entry_point)) { const auto& batch_id = static_cast(current_bucket.size()); - current_bucket.push_back(batch_program(bucket_id, batch_id, options, batch_headers)); + current_bucket.push_back(batch_program(bucket_id, batch_id, options, headers, is_cm)); } auto& current_batch = current_bucket.back(); @@ -270,12 +274,14 @@ kernels_cache::kernels_cache(engine& engine, const ExecutionConfig& config, uint32_t prog_id, std::shared_ptr task_executor, - const std::map& batch_headers) + const std::map& batch_headers, + const std::map& cm_batch_headers) : _device(get_target_device(engine)) , _task_executor(task_executor) , _config(config) , _prog_id(prog_id) - , batch_headers(std::move(batch_headers)) { } + , batch_headers(std::move(batch_headers)) + , cm_batch_headers(std::move(cm_batch_headers)) { } static std::vector getProgramBinaries(cl::Program program) { // Get the size of the program binary in bytes. diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp index b08b087c55854a..1bb0ffbd2066bb 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp @@ -58,7 +58,11 @@ class kernels_cache { bool has_microkernels; std::map> entry_point_to_id; - explicit batch_program(int32_t _bucket_id, int32_t _batch_id, std::string _options, const std::map& batch_headers) + explicit batch_program(int32_t _bucket_id, + int32_t _batch_id, + std::string _options, + const std::map& batch_headers, + bool is_cm = false) : bucket_id(_bucket_id), batch_id(_batch_id), hash_value(0), @@ -68,17 +72,22 @@ class kernels_cache { dump_custom_program(false), has_microkernels(false), entry_point_to_id({}) { - static const std::vector micro_kernel_include_names { - "generic_vector_ops", - "tile_ops", - "sdpa_utils" - }; - for (const auto& kv : batch_headers) { - if (std::find(micro_kernel_include_names.begin(), micro_kernel_include_names.end(), kv.first) == micro_kernel_include_names.end()) { - source.push_back(kv.second); - } else { - micro_headers.push_back(kv.second); + if (!is_cm) { + static const std::vector micro_kernel_include_names { + "generic_vector_ops", + "tile_ops", + "sdpa_utils" + }; + for (const auto& kv : batch_headers) { + if (std::find(micro_kernel_include_names.begin(), micro_kernel_include_names.end(), kv.first) == micro_kernel_include_names.end()) { + source.push_back(kv.second); + } else { + micro_headers.push_back(kv.second); + } } + } else { + for (const auto& kv : batch_headers) + source.push_back(kv.second); } } }; @@ -97,6 +106,7 @@ class kernels_cache { std::map, uint32_t> _cached_binaries; std::unordered_map _cached_kernels; std::map batch_headers; + std::map cm_batch_headers; std::unordered_map _kernel_batch_hash; void get_program_source(const kernels_code& kernels_source_code, std::vector*) const; void build_batch(const batch_program& batch, compiled_kernels& compiled_kernels); @@ -112,7 +122,8 @@ class kernels_cache { const ExecutionConfig& config, uint32_t prog_id, std::shared_ptr task_executor = nullptr, - const std::map& batch_headers = {}); + const std::map& batch_headers = {}, + const std::map& cm_batch_headers = {}); kernel::ptr get_kernel_from_cached_kernels(std::string id) const; std::vector get_kernels(const kernel_impl_params& params) const; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp index 9cf1a252564934..2bc377f2c1459a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp @@ -63,6 +63,7 @@ struct paged_attention_impl : multi_stage_primitive { void load(BinaryInputBuffer& ib) override { parent::load(ib); + ib >> make_data(&has_scores_output, sizeof(bool)); if (is_dynamic()) { auto& kv_cache_update_kernel_selector = kv_cache_update_kernel_selector_t::Instance(); auto kv_cache_update_kernel_impl = kv_cache_update_kernel_selector.GetImplementation(_kernels_data[Stage::KV_CACHE_UPDATE].kernelName); @@ -78,7 +79,45 @@ struct paged_attention_impl : multi_stage_primitive { } } + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); + ob << make_data(&has_scores_output, sizeof(bool)); + } + std::vector get_internal_buffer_layouts_impl() const override { + /* + * Internal buffers allocation owners and users: + * +--------------------------------------+--------------------+--------------------+ + * | Stage | Allocates & uses | Reuses | + * +--------------------------------------+--------------------+--------------------+ + * | KV_CACHE_UPDATE | [0, 1, 2] | | + * +--------------------------------------+--------------------+--------------------+ + * | SDPA (1st token) | | [0, 1, 2] | + * +--------------------------------------+--------------------+--------------------+ + * | PA_SDPA (2nd+ token) | [5, 6, 7] | | + * +--------------------------------------+--------------------+--------------------+ + * | PA_SDPA (mixed mode) | [5, 6, 7, 8] | | + * +--------------------------------------+--------------------+--------------------+ + * | SDPA (1st token) + scores output | | [0, 1, 2, 3, 4] | + * +--------------------------------------+--------------------+--------------------+ + * | PA_SDPA (2nd+ token) + scores output | [3, 4, 5, 6, 7] | | + * +--------------------------------------+--------------------+--------------------+ + * | PA_SDPA (mixed mode) + scores output | [3, 4, 5, 6, 7, 8] | | + * +--------------------------------------+--------------------+--------------------+ + * + * Description: + * 0, 1, 2 - Buffers used for proper blocks distribution for kv_cache_update and + * sdpa_opt (1st token calculation) block configuration over target_seq_len dimension. + * Filled in paged_attention_inst::on_execute() call. + * 3, 4 - Optional buffers used for PA scores output calculation, storing intermediate + * softmax values by partitions (filled in PA/SDPA kernels) and sequence length offsets + * for each subsequence (filled in paged_attention_inst::on_execute() call). + * 5, 6, 7 - Used for 2nd+ PA calculation (for softmax exp_sums, max_logits, and intermediate output). + * Filled in PA/SDPA kernels. + * 8 - Optional buffer used for mixed PA execution mode, mapping gws idx to subsequence id. + * Filled in paged_attention_inst::on_execute() call. + */ + auto add_internal_buffers = [](std::vector& layouts, const kernel_selector::KernelData& kd) { if (kd.internalBufferSizes.empty()) return; @@ -133,6 +172,7 @@ struct paged_attention_impl : multi_stage_primitive { args.outputs = { instance.output_memory_ptr(0) }; } else if (stage == Stage::PA_SDPA) { if (kernel_idx == 0 || kernel_idx == 1) { + // 2nd+ token calculation or mixed stage tokens calculation args.shape_info = instance.shape_info_memory_ptr(); args.inputs = { instance.input_memory_ptr(0), @@ -155,7 +195,8 @@ struct paged_attention_impl : multi_stage_primitive { if (desc->has_alibi) { args.inputs.push_back(instance.alibi_memory_ptr()); } - } else { + } else if (kernel_idx == 2 || kernel_idx == 3) { + // Finalization kernel or mixed stage finalization kernel args.inputs = { instance.past_lens_memory_ptr() }; if (is_mixed_mode) { @@ -163,17 +204,31 @@ struct paged_attention_impl : multi_stage_primitive { // dependency args.inputs.push_back(instance.subsequence_begins_memory_ptr()); } + } else if (kernel_idx == 4) { + // Output scores calculation kernel + args.inputs = { instance.past_lens_memory_ptr(), + instance.subsequence_begins_memory_ptr() }; } args.outputs = { instance.output_memory_ptr(0) }; + + if (kernel_idx == 4) { + args.outputs.push_back(instance.output_memory_ptr(1)); + } } return args; } std::set get_lockable_internal_buffers() const override { - return std::set{ 0, 1, 2, /* SDPA and KV_CACHE_UPDATE indexes configuration */ - 6, /* PA_SDPA multiple tokens mode */ }; + size_t mixed_mode_buffer = has_scores_output ? 8 : 6; + + std::set lockable_ids = { 0, 1, 2, /* SDPA and KV_CACHE_UPDATE indexes configuration */ + mixed_mode_buffer /* PA_SDPA multiple tokens mode */ }; + if (has_scores_output) + lockable_ids.insert(4 /* Precalculated accumulated sequence length offsets for each subsequence */); + + return lockable_ids; }; void execute_stage(const std::vector& events, @@ -194,8 +249,17 @@ struct paged_attention_impl : multi_stage_primitive { if (stage == Stage::PA_SDPA) { internal_buffers_offset = _kernels_data[Stage::KV_CACHE_UPDATE].internalBufferSizes.size(); internal_buffers_count = _kernels_data[Stage::PA_SDPA].internalBufferSizes.size(); - } else { + } else if (stage == Stage::KV_CACHE_UPDATE) { + internal_buffers_count = _kernels_data[Stage::KV_CACHE_UPDATE].internalBufferSizes.size(); + } else if (stage == Stage::SDPA) { internal_buffers_count = _kernels_data[Stage::KV_CACHE_UPDATE].internalBufferSizes.size(); + + const auto desc = instance.get_node().as().get_primitive(); + if (desc->has_scores_output()) { + // Add intermediate buffers for PagedAttention scores calculation: + // softmax_results, subsequence_offsets, exp_sums, max_logits, tmp_out + internal_buffers_count += 5; + } } for (size_t kd_idx = 0; kd_idx < _kernels_data[stage].kernels.size(); ++kd_idx) { @@ -216,6 +280,23 @@ struct paged_attention_impl : multi_stage_primitive { intermediate_memories.begin() + internal_buffers_offset, intermediate_memories.begin() + internal_buffers_offset + internal_buffers_count); + GPU_DEBUG_TRACE_DETAIL << "Execute stage=" << stage << " kernel=" << kd_idx << " " << _kernels_data[stage].kernelName << " start_offset=" + << internal_buffers_offset << " count=" << internal_buffers_count << "\n"; + + GPU_DEBUG_TRACE_DETAIL << "Configured kernel arguments:\n"; + for (size_t i = 0; i < _kernels_data[stage].kernels[kd_idx].params.arguments.size(); i++) { + GPU_DEBUG_TRACE_DETAIL << "\t" << i << ": type=" << static_cast(_kernels_data[stage].kernels[kd_idx].params.arguments[i].t) << " " + << "index=" << _kernels_data[stage].kernels[kd_idx].params.arguments[i].index << "\n"; + } + + GPU_DEBUG_TRACE_DETAIL << "Memory buffers:" + << "shape_info=" << args.shape_info << " " + << "inputs=" << args.inputs.size() << " " + << "outputs=" << args.outputs.size() << " " + << "intermediates=" << args.intermediates.size() << " " + << "weights=" << args.weights << " " + << "scalars=" << (args.scalars ? args.scalars->size() : 0) << "\n"; + stream.set_arguments(*_kernels[idx_final], _kernels_data[stage].kernels[kd_idx].params, args); const auto& gws = params.workGroups.global; @@ -242,10 +323,13 @@ struct paged_attention_impl : multi_stage_primitive { execute_stage(events, instance, res_events, Stage::KV_CACHE_UPDATE, is_mixed_mode); - std::vector dep_events(res_events.begin(), res_events.end()); if (stage == PagedAttentionStage::PREFILL) { + std::vector dep_events(res_events.begin(), res_events.end()); execute_stage(dep_events, instance, res_events, Stage::SDPA, is_mixed_mode); - } else if (stage == PagedAttentionStage::GENERATE || stage == PagedAttentionStage::MIXED) { + } + + if (stage == PagedAttentionStage::GENERATE || stage == PagedAttentionStage::MIXED || has_scores_output) { + std::vector dep_events(res_events.begin(), res_events.end()); execute_stage(dep_events, instance, res_events, Stage::PA_SDPA, is_mixed_mode); } @@ -338,7 +422,7 @@ struct paged_attention_impl : multi_stage_primitive { return aligned_seq_len; } - static kernel_selector::sdpa_configuration get_sdpa_configuration(const kernel_impl_params& impl_param) { + static kernel_selector::sdpa_configuration get_sdpa_configuration(const kernel_impl_params& impl_param, bool is_dynamic = true) { kernel_selector::sdpa_configuration config; const auto desc = impl_param.typed_desc(); @@ -362,37 +446,45 @@ struct paged_attention_impl : multi_stage_primitive { config.group_size = desc->heads_num / desc->kv_heads_num; } + if (desc->has_scores_output() && !is_dynamic) { + const auto& input_mem = impl_param.memory_deps; + const auto max_context_len = input_mem.at(12); + mem_lock max_context_len_mem_lock(max_context_len, *impl_param.strm); + config.paged_attention_max_len = max_context_len_mem_lock[0]; + } + return config; } static kv_cache_update_kernel_params_t get_kv_cache_update_kernel_params(const kernel_impl_params& impl_param, const PagedAttentionStage& stage, + const kernel_selector::MultiDataTensor& input_tensors, bool is_dynamic = false) { auto params = get_default_params(impl_param, is_dynamic); - const auto& key_layout = impl_param.get_input_layout(1); - const auto& value_layout = impl_param.get_input_layout(2); - const auto& key_cache_layout = impl_param.get_input_layout(3); - const auto& value_cache_layout = impl_param.get_input_layout(4); - const auto& past_lens_layout = impl_param.get_input_layout(5); - const auto& block_indices_layout = impl_param.get_input_layout(7); - const auto& block_indices_begins_layout = impl_param.get_input_layout(8); - const auto& subsequence_begins_layout = impl_param.get_input_layout(6); + const auto& key_tensor = input_tensors[1]; + const auto& value_tensor = input_tensors[2]; + const auto& key_cache_tensor = input_tensors[3]; + const auto& value_cache_tensor = input_tensors[4]; + const auto& past_lens_tensor = input_tensors[5]; + const auto& block_indices_tensor = input_tensors[7]; + const auto& block_indices_begins_tensor = input_tensors[8]; + const auto& subsequence_begins_tensor = input_tensors[6]; const auto inputs_number = 6; const auto outputs_number = 2; params.inputs.resize(inputs_number); params.outputs.resize(outputs_number); - params.inputs[0] = convert_data_tensor(key_layout); - params.inputs[1] = convert_data_tensor(value_layout); - params.inputs[2] = convert_data_tensor(past_lens_layout); - params.inputs[3] = convert_data_tensor(block_indices_layout); - params.inputs[4] = convert_data_tensor(block_indices_begins_layout); - params.inputs[5] = convert_data_tensor(subsequence_begins_layout); - params.outputs[0] = convert_data_tensor(key_cache_layout); - params.outputs[1] = convert_data_tensor(value_cache_layout); + params.inputs[0] = key_tensor; + params.inputs[1] = value_tensor; + params.inputs[2] = past_lens_tensor; + params.inputs[3] = block_indices_tensor; + params.inputs[4] = block_indices_begins_tensor; + params.inputs[5] = subsequence_begins_tensor; + params.outputs[0] = key_cache_tensor; + params.outputs[1] = value_cache_tensor; - params.conf = get_sdpa_configuration(impl_param); + params.conf = get_sdpa_configuration(impl_param, is_dynamic); params.is_prefill = stage == PagedAttentionStage::PREFILL || stage == PagedAttentionStage::MIXED; @@ -418,18 +510,23 @@ struct paged_attention_impl : multi_stage_primitive { return params; } - static sdpa_kernel_params_t get_sdpa_kernel_params(const kernel_impl_params& impl_param, const PagedAttentionStage& stage, bool is_dynamic = false) { + static sdpa_kernel_params_t get_sdpa_kernel_params(const kernel_impl_params& impl_param, + const PagedAttentionStage& stage, + const kernel_selector::MultiDataTensor& input_tensors, + bool is_dynamic = false) { const auto desc = impl_param.typed_desc(); auto params = get_default_params(impl_param, is_dynamic); - const auto& query_layout = impl_param.get_input_layout(0); - const auto& key_layout = impl_param.get_input_layout(1); - const auto& value_layout = impl_param.get_input_layout(2); - const auto& subsequence_begins_layout = impl_param.get_input_layout(6); - const auto& scale_layout = impl_param.get_input_layout(9); - const auto& alibi_layout = impl_param.get_input_layout(11); - const auto has_alibi = alibi_layout.count() > 0; + const auto& query_tensor = input_tensors[0]; + const auto& key_tensor = input_tensors[1]; + const auto& value_tensor = input_tensors[2]; + const auto& subsequence_begins_tensor = input_tensors[6]; + const auto& scale_tensor = input_tensors[9]; + const auto& alibi_tensor = input_tensors[11]; + + const auto has_alibi = impl_param.get_input_layout(11).count() > 0; const auto has_scale_input = !desc->scale_val.has_value(); + const auto has_scores_output = desc->has_scores_output(); auto inputs_number = 4; if (has_scale_input) @@ -440,18 +537,23 @@ struct paged_attention_impl : multi_stage_primitive { auto input_idx = 0; params.inputs.resize(inputs_number); - params.inputs[input_idx++] = convert_data_tensor(query_layout); - params.inputs[input_idx++] = convert_data_tensor(key_layout); - params.inputs[input_idx++] = convert_data_tensor(value_layout); - params.inputs[input_idx++] = convert_data_tensor(subsequence_begins_layout); + params.inputs[input_idx++] = query_tensor; + params.inputs[input_idx++] = key_tensor; + params.inputs[input_idx++] = value_tensor; + params.inputs[input_idx++] = subsequence_begins_tensor; if (has_scale_input) - params.inputs[input_idx++] = convert_data_tensor(scale_layout); + params.inputs[input_idx++] = scale_tensor; if (has_alibi) - params.inputs[input_idx++] = convert_data_tensor(alibi_layout); + params.inputs[input_idx++] = alibi_tensor; - params.conf = get_sdpa_configuration(impl_param); + if (has_scores_output) { + params.outputs.resize(2); + params.outputs[1] = convert_data_tensor(impl_param.get_output_layout(1)); + } + + params.conf = get_sdpa_configuration(impl_param, is_dynamic); const auto& in_offsets_map = impl_param.in_port_to_shape_info_offset; const auto& out_offsets_map = impl_param.out_port_to_shape_info_offset; @@ -475,26 +577,34 @@ struct paged_attention_impl : multi_stage_primitive { if ((stage == PagedAttentionStage::PREFILL || stage == PagedAttentionStage::MIXED) && !is_dynamic) params.conf.paged_attention_aligned_seq_len = get_aligned_seq_len(impl_param, stage); + if (has_scores_output) + out_tensor_to_offset_map.insert({1, out_offsets_map.at(1)}); + params.set_dynamic_shape_offsets(in_tensor_to_offset_map, out_tensor_to_offset_map); return params; } - static pa_sdpa_kernel_params_t get_pa_sdpa_params(const kernel_impl_params& impl_param, const PagedAttentionStage& stage, bool is_dynamic = false) { + static pa_sdpa_kernel_params_t get_pa_sdpa_params(const kernel_impl_params& impl_param, + const PagedAttentionStage& stage, + const kernel_selector::MultiDataTensor& input_tensors, + bool is_dynamic = false) { const auto desc = impl_param.typed_desc(); auto params = get_default_params(impl_param, is_dynamic); - const auto& query_layout = impl_param.get_input_layout(0); - const auto& key_cache_layout = impl_param.get_input_layout(3); - const auto& value_cache_layout = impl_param.get_input_layout(4); - const auto& past_lens_layout = impl_param.get_input_layout(5); - const auto& block_indices_layout = impl_param.get_input_layout(7); - const auto& block_indices_begins_layout = impl_param.get_input_layout(8); - const auto& subsequence_begins_layout = impl_param.get_input_layout(6); - const auto& scale_layout = impl_param.get_input_layout(9); - const auto& alibi_layout = impl_param.get_input_layout(11); - const auto has_alibi = alibi_layout.count() > 0; + const auto& query_tensor = input_tensors[0]; + const auto& key_cache_tensor = input_tensors[3]; + const auto& value_cache_tensor = input_tensors[4]; + const auto& past_lens_tensor = input_tensors[5]; + const auto& block_indices_tensor = input_tensors[7]; + const auto& block_indices_begins_tensor = input_tensors[8]; + const auto& subsequence_begins_tensor = input_tensors[6]; + const auto& scale_tensor = input_tensors[9]; + const auto& alibi_tensor = input_tensors[11]; + + const auto has_alibi = impl_param.get_input_layout(11).count() > 0; const auto has_scale_input = !desc->scale_val.has_value(); + const auto has_scores_output = desc->has_scores_output(); auto inputs_number = 7; if (has_scale_input) @@ -505,28 +615,34 @@ struct paged_attention_impl : multi_stage_primitive { auto input_idx = 0; params.inputs.resize(inputs_number); - params.inputs[input_idx++] = convert_data_tensor(query_layout); - params.inputs[input_idx++] = convert_data_tensor(key_cache_layout); - params.inputs[input_idx++] = convert_data_tensor(value_cache_layout); - params.inputs[input_idx++] = convert_data_tensor(past_lens_layout); - params.inputs[input_idx++] = convert_data_tensor(block_indices_layout); - params.inputs[input_idx++] = convert_data_tensor(block_indices_begins_layout); - params.inputs[input_idx++] = convert_data_tensor(subsequence_begins_layout); - params.conf = get_sdpa_configuration(impl_param); + params.inputs[input_idx++] = query_tensor; + params.inputs[input_idx++] = key_cache_tensor; + params.inputs[input_idx++] = value_cache_tensor; + params.inputs[input_idx++] = past_lens_tensor; + params.inputs[input_idx++] = block_indices_tensor; + params.inputs[input_idx++] = block_indices_begins_tensor; + params.inputs[input_idx++] = subsequence_begins_tensor; + + params.conf = get_sdpa_configuration(impl_param, is_dynamic); if (has_scale_input) - params.inputs[input_idx++] = convert_data_tensor(scale_layout); + params.inputs[input_idx++] = scale_tensor; if (has_alibi) - params.inputs[input_idx++] = convert_data_tensor(alibi_layout); + params.inputs[input_idx++] = alibi_tensor; - params.multi_tokens_mode = stage == PagedAttentionStage::MIXED; + if (has_scores_output) { + params.outputs.resize(2); + params.outputs[1] = convert_data_tensor(impl_param.get_output_layout(1)); + } - if ((stage == PagedAttentionStage::GENERATE || stage == PagedAttentionStage::MIXED) && !is_dynamic) { + params.stage = stage; + + if (!has_scores_output && !is_dynamic) { const auto& input_mem = impl_param.memory_deps; const auto max_context_len = input_mem.at(12); mem_lock max_context_len_mem_lock(max_context_len, *impl_param.strm); - params.max_context_len = max_context_len_mem_lock[0]; + params.conf.paged_attention_max_len = max_context_len_mem_lock[0]; } const auto& in_offsets_map = impl_param.in_port_to_shape_info_offset; @@ -552,6 +668,9 @@ struct paged_attention_impl : multi_stage_primitive { if (has_alibi) in_tensor_to_offset_map.insert({input_idx++, in_offsets_map.at(11)}); + if (has_scores_output) + out_tensor_to_offset_map.insert({1, out_offsets_map.at(1)}); + params.set_dynamic_shape_offsets(in_tensor_to_offset_map, out_tensor_to_offset_map); return params; @@ -560,14 +679,20 @@ struct paged_attention_impl : multi_stage_primitive { void update_dispatch_data(const kernel_impl_params& impl_param) override { const auto stage = get_paged_attention_stage(impl_param); - auto kv_cache_update_kernel_params = get_kv_cache_update_kernel_params(impl_param, stage, impl_param.is_dynamic()); + kernel_selector::MultiDataTensor input_tensors; + for (const auto& input_layout : impl_param.input_layouts) + input_tensors.emplace_back(convert_data_tensor(input_layout)); + + auto kv_cache_update_kernel_params = get_kv_cache_update_kernel_params(impl_param, stage, input_tensors, impl_param.is_dynamic()); (_kernels_data[Stage::KV_CACHE_UPDATE].update_dispatch_data_func)(kv_cache_update_kernel_params, _kernels_data[Stage::KV_CACHE_UPDATE]); if (stage == PagedAttentionStage::PREFILL) { - auto sdpa_kernel_params = get_sdpa_kernel_params(impl_param, stage, impl_param.is_dynamic()); + auto sdpa_kernel_params = get_sdpa_kernel_params(impl_param, stage, input_tensors, impl_param.is_dynamic()); (_kernels_data[Stage::SDPA].update_dispatch_data_func)(sdpa_kernel_params, _kernels_data[Stage::SDPA]); - } else if (stage == PagedAttentionStage::GENERATE || stage == PagedAttentionStage::MIXED) { - auto pa_sdpa_kernel_params = get_pa_sdpa_params(impl_param, stage, impl_param.is_dynamic()); + } + + if (stage == PagedAttentionStage::GENERATE || stage == PagedAttentionStage::MIXED || has_scores_output) { + auto pa_sdpa_kernel_params = get_pa_sdpa_params(impl_param, stage, input_tensors, impl_param.is_dynamic()); (_kernels_data[Stage::PA_SDPA].update_dispatch_data_func)(pa_sdpa_kernel_params, _kernels_data[Stage::PA_SDPA]); } } @@ -576,20 +701,32 @@ struct paged_attention_impl : multi_stage_primitive { std::vector kernels_data; const auto stage = PagedAttentionStage::UNKNOWN; - auto kv_cache_update_kernel_params = get_kv_cache_update_kernel_params(impl_param, stage, impl_param.is_dynamic()); + kernel_selector::MultiDataTensor input_tensors; + for (const auto& input_layout : impl_param.input_layouts) + input_tensors.emplace_back(convert_data_tensor(input_layout)); + + auto kv_cache_update_kernel_params = get_kv_cache_update_kernel_params(impl_param, stage, input_tensors, impl_param.is_dynamic()); auto& kv_cache_update_kernel_selector = kv_cache_update_kernel_selector_t::Instance(); kernels_data.push_back(kv_cache_update_kernel_selector.get_best_kernel(kv_cache_update_kernel_params)); - auto sdpa_kernel_params = get_sdpa_kernel_params(impl_param, stage, impl_param.is_dynamic()); + auto sdpa_kernel_params = get_sdpa_kernel_params(impl_param, stage, input_tensors, impl_param.is_dynamic()); auto& sdpa_kernel_selector = sdpa_kernel_selector_t::Instance(); kernels_data.push_back(sdpa_kernel_selector.get_best_kernel(sdpa_kernel_params)); - auto pa_sdpa_kernel_params = get_pa_sdpa_params(impl_param, stage, impl_param.is_dynamic()); + auto pa_sdpa_kernel_params = get_pa_sdpa_params(impl_param, stage, input_tensors, impl_param.is_dynamic()); auto& pa_sdpa_kernel_selector = pa_sdpa_kernel_selector_t::Instance(); kernels_data.push_back(pa_sdpa_kernel_selector.get_best_kernel(pa_sdpa_kernel_params)); - return cldnn::make_unique(kernels_data); + auto impl = cldnn::make_unique(kernels_data); + + const auto& desc = impl_param.typed_desc(); + impl->has_scores_output = desc->has_scores_output(); + + return impl; } + +private: + bool has_scores_output = false; }; namespace detail { diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/fake_convert_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/fake_convert_impls.cpp new file mode 100644 index 00000000000000..991ab5aa12657a --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/registry/fake_convert_impls.cpp @@ -0,0 +1,24 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "registry.hpp" +#include "intel_gpu/primitives/fake_convert.hpp" +#include "primitive_inst.h" + +namespace ov { +namespace intel_gpu { + +using namespace cldnn; + +const std::vector>& Registry::get_implementations() { + static const std::vector> impls = { + OV_GPU_GET_INSTANCE_CPU(fake_convert, shape_types::static_shape) + OV_GPU_GET_INSTANCE_CPU(fake_convert, shape_types::dynamic_shape) + }; + + return impls; +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/fully_connected_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/fully_connected_impls.cpp index 6f725150794fb6..6ea9eb33c7421c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/fully_connected_impls.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/fully_connected_impls.cpp @@ -11,6 +11,10 @@ #include "impls/onednn/fully_connected_onednn.hpp" #endif +#if OV_GPU_WITH_CM + #include "impls/cm/impl_example.hpp" +#endif + namespace ov { namespace intel_gpu { @@ -26,6 +30,7 @@ const std::vector>& Registry +#include + +namespace cldnn { + +template <> +struct typed_program_node : public typed_program_node_base { + using parent = typed_program_node_base; + typed_program_node(const std::shared_ptr prim, program& prog) + : parent(prim, prog), destination_type(prim->destination_type) { + support_padding_all(true); + } + +public: + using parent::parent; + + program_node& input() const { return get_dependency(0); } + program_node& scale() const { return get_dependency(1); } + program_node& shift() const { return get_dependency(2); } + bool has_shift() const { return (get_dependencies().size() == 3); } + + ov::element::Type get_destination_type() const { return destination_type; } + + std::vector get_shape_infer_dependencies() const override { return {}; } + +private: + ov::element::Type destination_type; +}; + +using fake_convert_node = typed_program_node; + +template <> +class typed_primitive_inst : public typed_primitive_inst_base { + using parent = typed_primitive_inst_base; + using parent::parent; + +public: + template + static std::vector calc_output_layouts(fake_convert_node const& /*node*/, const kernel_impl_params& impl_param); + static layout calc_output_layout(fake_convert_node const& node, kernel_impl_params const& impl_param); + static std::string to_string(fake_convert_node const& node); + + typed_primitive_inst(network& network, fake_convert_node const& node); +}; + +using fake_convert_inst = typed_primitive_inst; +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/include/paged_attention_inst.h b/src/plugins/intel_gpu/src/graph/include/paged_attention_inst.h index a7918ba9c3719c..675d77296aa06b 100644 --- a/src/plugins/intel_gpu/src/graph/include/paged_attention_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/paged_attention_inst.h @@ -7,14 +7,11 @@ #include "intel_gpu/primitives/paged_attention.hpp" #include "primitive_inst.h" +#include "sdpa/pa_sdpa_kernel_opt.h" + namespace cldnn { -enum PagedAttentionStage { - GENERATE = 0, - PREFILL = 1, - MIXED = 2, - UNKNOWN = 3 -}; +using PagedAttentionStage = kernel_selector::PagedAttentionStage; PagedAttentionStage get_paged_attention_stage(const kernel_impl_params& impl_param); @@ -61,6 +58,9 @@ class typed_primitive_inst : public typed_primitive_inst_base

prefill_network; diff --git a/src/plugins/intel_gpu/src/graph/paged_attention.cpp b/src/plugins/intel_gpu/src/graph/paged_attention.cpp index 787fd184f75b6a..c761aaf63799cd 100644 --- a/src/plugins/intel_gpu/src/graph/paged_attention.cpp +++ b/src/plugins/intel_gpu/src/graph/paged_attention.cpp @@ -48,14 +48,38 @@ layout paged_attention_inst::calc_output_layout(const paged_attention_node& /*no template std::vector paged_attention_inst::calc_output_layouts(paged_attention_node const& /*node*/, kernel_impl_params const& impl_param) { - auto out_layout = impl_param.get_input_layout(0); + auto data_layout = impl_param.get_input_layout(0); const auto& key_cache_ps = impl_param.get_input_layout(3).get_partial_shape(); bool valid_block_size = key_cache_ps[3].is_dynamic() || key_cache_ps[3].get_length() == paged_attention::block_size; OPENVINO_ASSERT(valid_block_size, "[GPU] Incorrect block size for Paged Attention operation. " "Expected ", paged_attention::block_size, ", but got ", key_cache_ps[3].get_length()); - return {out_layout}; + std::vector output_layouts{ data_layout }; + + const auto& desc = impl_param.typed_desc(); + if (desc->has_scores_output()) { + const auto past_lens_idx = 5; + const auto output_dt = data_layout.data_type; + if (impl_param.get_input_layout(past_lens_idx).is_static()) { + const auto& memory_deps = impl_param.memory_deps; + const auto past_lens_mem = memory_deps.at(past_lens_idx); + mem_lock past_lens_mem_lock(past_lens_mem, *impl_param.strm); + + long int total_size = 0; + for (size_t i = 0; i < past_lens_mem_lock.size(); i++) { + total_size += past_lens_mem_lock[i]; + } + + total_size += static_cast(impl_param.get_input_layout(0).get_shape()[0]); + + output_layouts.push_back(layout{ov::PartialShape{total_size}, output_dt, format::bfyx}); + } else { + output_layouts.push_back(layout{ov::PartialShape::dynamic(1), output_dt, format::bfyx}); + } + } + + return output_layouts; } template std::vector @@ -81,45 +105,79 @@ std::string paged_attention_inst::to_string(const paged_attention_node& node) { } void paged_attention_inst::on_execute() { - auto stage = get_paged_attention_stage(*_impl_params); + const auto& desc = _impl_params->typed_desc(); + const bool has_scores_output = desc->has_scores_output(); + const auto stage = get_paged_attention_stage(*_impl_params); - if (stage == PagedAttentionStage::UNKNOWN || - stage == PagedAttentionStage::GENERATE) + if ((stage == PagedAttentionStage::UNKNOWN) || + (stage == PagedAttentionStage::GENERATE && !has_scores_output)) return; + auto& stream = get_network().get_stream(); + const auto past_lens_mem = past_lens_memory_ptr(); + const auto subsequence_begins_mem = subsequence_begins_memory_ptr(); + mem_lock past_lens_mem_lock(past_lens_mem, stream); + mem_lock subsequence_begins_mem_lock(subsequence_begins_mem, stream); + std::unique_ptr> subsequence_offsets_lock = nullptr; + + if (has_scores_output) { + const size_t subsequence_offsets_idx = 4; + + OPENVINO_ASSERT(_intermediates_memory.size() > subsequence_offsets_idx, + "[GPU] Unexpected number of intermediates buffers for Paged Attention for scores output calculation"); + + auto subsequence_offsets_mem = _intermediates_memory[subsequence_offsets_idx]; + subsequence_offsets_lock.reset(new mem_lock(subsequence_offsets_mem, stream)); + } + + if (stage == PagedAttentionStage::GENERATE) { + // For the generate stage it's not necessary to configure any other intermediate + // buffers. Simply calculate the offsets and exit + size_t subsequence_offsets_acc = 0; + for (size_t i = 0; i < subsequence_begins_mem_lock.size() - 1; i++) { + const auto past_len = past_lens_mem_lock[i]; + const auto seq_start = subsequence_begins_mem_lock[i]; + const auto seq_end = subsequence_begins_mem_lock[i + 1]; + const auto seq_length = seq_end - seq_start; + + if (subsequence_offsets_lock) { + subsequence_offsets_lock->operator[](i) = static_cast(subsequence_offsets_acc); + subsequence_offsets_acc += seq_length + past_len; + } + } + + return; + } + OPENVINO_ASSERT(_intermediates_memory.size() >= 3, "Unexpected number of intermediates buffers for Paged Attention at prefill stage"); const auto blocks_indexes_start_idx = 0; const auto blocks_indexes_end_idx = 1; const auto blocked_gws_subseq_mapping_idx = 2; - const auto past_lens_mem = past_lens_memory_ptr(); - auto subsequence_begins_mem = subsequence_begins_memory_ptr(); auto blocks_indexes_start_mem = _intermediates_memory[blocks_indexes_start_idx]; auto blocks_indexes_end_mem = _intermediates_memory[blocks_indexes_end_idx]; auto blocked_gws_subseq_mapping_mem = _intermediates_memory[blocked_gws_subseq_mapping_idx]; OPENVINO_ASSERT(subsequence_begins_mem->get_layout().data_type == data_types::i32); - auto& stream = get_network().get_stream(); - mem_lock past_lens_mem_lock(past_lens_mem, stream); - mem_lock subsequence_begins_mem_lock(subsequence_begins_mem, stream); mem_lock blocks_indexes_start_lock(blocks_indexes_start_mem, stream); mem_lock blocks_indexes_end_lock(blocks_indexes_end_mem, stream); mem_lock blocked_gws_subseq_mapping_mem_lock(blocked_gws_subseq_mapping_mem, stream); std::unique_ptr> sequential_gws_subseq_mapping_lock = nullptr; if (stage == PagedAttentionStage::MIXED) { - const auto sequential_gws_subseq_mapping_idx = 6; + const size_t sequential_gws_subseq_mapping_idx = has_scores_output ? 8 : 6; OPENVINO_ASSERT(_intermediates_memory.size() > sequential_gws_subseq_mapping_idx, - "Unexpected number of intermediates buffers for Paged Attention for mixed stage"); + "[GPU] Unexpected number of intermediates buffers for Paged Attention for mixed stage"); auto sequential_gws_subseq_mapping_mem = _intermediates_memory[sequential_gws_subseq_mapping_idx]; sequential_gws_subseq_mapping_lock.reset(new mem_lock(sequential_gws_subseq_mapping_mem, stream)); } size_t index = 0; + size_t subsequence_offsets_acc = 0; const auto target_seq_len_block_size = 16; // TODO: Get block size from the impl for (size_t i = 0; i < subsequence_begins_mem_lock.size() - 1; i++) { const auto past_len = past_lens_mem_lock[i]; @@ -159,6 +217,11 @@ void paged_attention_inst::on_execute() { sequential_gws_subseq_mapping_lock->operator[](idx) = static_cast(i); } } + + if (subsequence_offsets_lock) { + subsequence_offsets_lock->operator[](i) = static_cast(subsequence_offsets_acc); + subsequence_offsets_acc += seq_length + past_len; + } } } diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index bdffb9c4980722..c938be22b816ed 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -228,7 +228,8 @@ void program::init_program() { if (_task_executor == nullptr) _task_executor = program::make_task_executor(_config); _kernels_cache = std::unique_ptr(new kernels_cache(_engine, _config, prog_id, _task_executor, - kernel_selector::KernelBase::get_db().get_batch_headers())); + kernel_selector::KernelBase::get_db().get_batch_headers(), + kernel_selector::KernelBase::get_db().get_cm_batch_headers())); _kernels_cache->set_kernels_reuse(get_config().get_property(ov::intel_gpu::hint::enable_kernels_reuse)); @@ -1501,6 +1502,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { prim.type() != cldnn::strided_slice::type_id() && prim.type() != cldnn::region_yolo::type_id() && prim.type() != cldnn::normalize::type_id() && + prim.type() != cldnn::group_normalization::type_id() && prim.type() != cldnn::mvn::type_id() && prim.type() != cldnn::gather::type_id() && prim.type() != cldnn::scatter_nd_update::type_id() && @@ -1581,6 +1583,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { prim.type() != cldnn::deconvolution::type_id() && prim.type() != cldnn::multiclass_nms::type_id() && prim.type() != cldnn::normalize::type_id() && + prim.type() != cldnn::group_normalization::type_id() && prim.type() != cldnn::deconvolution::type_id() && prim.type() != cldnn::unique_count::type_id() && prim.type() != cldnn::unique_gather::type_id() && diff --git a/src/plugins/intel_gpu/src/kernel_selector/CMakeLists.txt b/src/plugins/intel_gpu/src/kernel_selector/CMakeLists.txt index 2b32423f9ce3a8..0c29c8afb9ff01 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/CMakeLists.txt +++ b/src/plugins/intel_gpu/src/kernel_selector/CMakeLists.txt @@ -11,11 +11,16 @@ file(GLOB_RECURSE LIBRARY_SRC "${CMAKE_CURRENT_SOURCE_DIR}/*.hpp" "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp" ) +list(FILTER LIBRARY_SRC EXCLUDE REGEX "${CMAKE_CURRENT_SOURCE_DIR}/cm_kernels/.*" ) file(GLOB_RECURSE KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/cl_kernels/*.cl" ) +file(GLOB_RECURSE CM_KERNELS + "${CMAKE_CURRENT_SOURCE_DIR}/cm_kernels/*" +) + # Path which points to root directory where code generated elements are created # (specific to build configuration). set(CODEGEN_DIR "${CMAKE_CURRENT_BINARY_DIR}/codegen") @@ -28,8 +33,12 @@ set(CODEGEN_INCDIR "${CODEGEN_DIR}/include") set(PRIM_DB "ks_primitive_db.inc") set(PRIM_DB_BATCH_HEADERS "ks_primitive_db_batch_headers.inc") +set(CM_PRIM_DB "ks_cm_primitive_db.inc") +set(CM_PRIM_DB_BATCH_HEADERS "ks_cm_primitive_db_batch_headers.inc") set(CODEGEN_CACHE_SOURCES "${CODEGEN_INCDIR}/${PRIM_DB}" - "${CODEGEN_INCDIR}/${PRIM_DB_BATCH_HEADERS}") + "${CODEGEN_INCDIR}/${PRIM_DB_BATCH_HEADERS}" + "${CODEGEN_INCDIR}/${CM_PRIM_DB}" + "${CODEGEN_INCDIR}/${CM_PRIM_DB_BATCH_HEADERS}") set(CODEGEN_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/primitive_db_gen.py") # Helping with some generators. @@ -52,6 +61,22 @@ add_custom_command(OUTPUT "${CODEGEN_INCDIR}/${PRIM_DB}" COMMENT "Updating file if the file changed (${PRIM_DB}) ..." ) +add_custom_command(OUTPUT "${CODEGEN_CACHE_DIR}/${CM_PRIM_DB}" + COMMAND "${Python3_EXECUTABLE}" "${CODEGEN_SCRIPT}" -out_path "${CODEGEN_CACHE_DIR}" + -out_file_name_prim_db "${CM_PRIM_DB}" + -out_file_name_batch_headers "${CM_PRIM_DB_BATCH_HEADERS}" + -kernels "${CMAKE_CURRENT_SOURCE_DIR}/cm_kernels" -cm + DEPENDS ${CM_KERNELS} "${CODEGEN_SCRIPT}" "${CODEGEN_INCDIR}/${PRIM_DB}" + COMMENT "Generating ${CODEGEN_CACHE_DIR}/${CM_PRIM_DB} ..." +) + +add_custom_command(OUTPUT "${CODEGEN_INCDIR}/${CM_PRIM_DB}" + COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${CODEGEN_CACHE_DIR}/${CM_PRIM_DB}" "${CODEGEN_INCDIR}/${CM_PRIM_DB}" + COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${CODEGEN_CACHE_DIR}/${CM_PRIM_DB_BATCH_HEADERS}" "${CODEGEN_INCDIR}/${CM_PRIM_DB_BATCH_HEADERS}" + DEPENDS "${CODEGEN_CACHE_DIR}/${CM_PRIM_DB}" "${CM_KERNELS}" "${CODEGEN_SCRIPT}" "${CODEGEN_INCDIR}/${PRIM_DB}" + COMMENT "Updating file if the file changed (${CM_PRIM_DB}) ..." +) + add_library(${TARGET_NAME} STATIC ${LIBRARY_SRC} ${CODEGEN_CACHE_SOURCES}) if(NOT BUILD_SHARED_LIBS) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl index 542fa69ebc241b..109fa2de9841aa 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl @@ -122,8 +122,8 @@ KERNEL(convolution_b_fs_yx_fsv16_1x1)( { #endif // SLM_DIV_FACTOR > 1 vec_t src = 0; -#if INPUT_LEFTOVERS - if ((k + 1) * FEATURE_SLICE_SIZE >= INPUT0_FEATURE_NUM) + + if (INPUT_LEFTOVERS && ((k + 1) * FEATURE_SLICE_SIZE >= INPUT0_FEATURE_NUM)) { if (k * FEATURE_SLICE_SIZE + sglid < INPUT0_FEATURE_NUM) { @@ -143,7 +143,6 @@ KERNEL(convolution_b_fs_yx_fsv16_1x1)( } } else -#endif // INPUT_LEFTOVERS { #if PADDED_INPUT #if X_BLOCK_SIZE > 1 diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl index 01c8e8853e350d..6a5c9e54a8e904 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl @@ -601,8 +601,10 @@ inline void FUNC(fc_bf_tiled_kernel_default)( #endif #if TILE_OFM > 1 ((ACCUMULATOR_TYPE*)(&acc[bi]))[fi] += ((ACCUMULATOR_TYPE*)(&acc_tmp[bi]))[fi] * ds; + acc_tmp[bi][fi] = 0; #else acc[bi] += acc_tmp[bi] * ds; + acc_tmp[bi] = 0; #endif } } @@ -972,7 +974,7 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( // ===================================================================================================================================== // Main computation loop const uint iterations = MAIN_LOOP_ELEMENTS_COUNT / TILE_IFM_ELEMENTS_SIZE; // TILE_IFM_ELEMENTS_SIZE : (TILE_IFM * SIMD) - // Each sub-group loads 2 Batch + // Each sub-group loads 2 Batch uint idx_sglid = (sglid * TILE_K) % TILE_IFM_ELEMENTS_SIZE; // same index for sglid 0~7 : to tile_k direction uint batch_sglid = (sglid * TILE_K) / TILE_IFM_ELEMENTS_SIZE; // 0 to 1 : to batch direction diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/fully_connected_gpu_bf_tiled_common.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/fully_connected_gpu_bf_tiled_common.cl index ca5c1ea3646d02..3f5796a30933ac 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/fully_connected_gpu_bf_tiled_common.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/fully_connected_gpu_bf_tiled_common.cl @@ -147,9 +147,7 @@ inline void (FUNC_NAME)( // NOTE: Manually unrolling multiplication loop leads to lower register pressure and allows for bigger block sizes, // but significantly degrades readability and generality of code. // It doesn't also show noticable performance improvement on tested configurations. - #if DECOMPRESSION_SCALE_POST_OP - ACCUMULATOR_VEC_TYPE acc_tmp[FORCED_TILE_B] = { }; - #endif + ACCUMULATOR_VEC_TYPE acc_tmp[FORCED_TILE_B] = { }; unroll_for(uint ki = 0; ki < (TILE_IFM * SIMD) / TILE_K; ++ki) { #if COMPRESSED_WEIGHTS_INT4 @@ -201,11 +199,7 @@ inline void (FUNC_NAME)( unroll_for (uint bi = 0; bi < FORCED_TILE_B; ++bi) { INPUT0_TYPE in_val = _sub_group_shuffle(((INPUT0_TYPE*)(&in_0[bi]))[total_k / SIMD], total_k % SIMD); unroll_for (uint fi = 0; fi < TILE_OFM; ++fi) { -#if DECOMPRESSION_SCALE_POST_OP ((ACCUMULATOR_TYPE*)(&acc_tmp[bi]))[fi] += in_val * ((ACCUMULATOR_TYPE*)(&wei))[W_IDX]; -#else - ((ACCUMULATOR_TYPE*)(&acc[bi]))[fi] += in_val * ((ACCUMULATOR_TYPE*)(&wei))[W_IDX]; -#endif } } } @@ -240,9 +234,20 @@ inline void (FUNC_NAME)( ACCUMULATOR_TYPE ds = d_scales[fi % DECOMPRESSION_SCALE_LENGTH]; #endif ((ACCUMULATOR_TYPE*)(&acc[bi]))[fi] += ((ACCUMULATOR_TYPE*)(&acc_tmp[bi]))[fi] * ds; + acc_tmp[bi][fi] = 0; } } #endif + +#if !DECOMPRESSION_SCALE_POST_OP + unroll_for (uint bi = 0; bi < FORCED_TILE_B; ++bi) { + unroll_for(uint fi = 0; fi < TILE_OFM; ++fi) { + ((ACCUMULATOR_TYPE*)(&acc[bi]))[fi] += ((ACCUMULATOR_TYPE*)(&acc_tmp[bi]))[fi]; + } + } +#endif + + } // ===================================================================================================================================== // Leftovers diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pa_sdpa_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pa_sdpa_opt.cl index 00c43829d02ea7..7e960afa4b87d3 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pa_sdpa_opt.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pa_sdpa_opt.cl @@ -44,6 +44,10 @@ KERNEL(pa_sdpa_opt)( const __global ALIBI_INPUT_TYPE* alibi_slopes, #endif __global OUTPUT_TYPE* output, +#if PAGED_ATTENTION_SCORES_OUTPUT + __global SOFTMAX_ACCUMULATOR_TYPE* softmax_results, + const __global int* subsequence_offsets, +#endif __global SOFTMAX_ACCUMULATOR_TYPE* exp_sums, __global SOFTMAX_ACCUMULATOR_TYPE* max_logits, __global OUTPUT_TYPE* tmp_out @@ -276,6 +280,28 @@ KERNEL(pa_sdpa_opt)( const uint max_logits_offset = exp_sums_offset; max_logits[max_logits_offset] = qk_max; } + +#if PAGED_ATTENTION_SCORES_OUTPUT +#if MULTI_TOKENS_PROCESSING + const uint subsequence_idx = gws_subseq_mapping[seq_idx]; + const uint subsequence_start_pos = subsequence_begins[subsequence_idx]; + const uint subsequence_end_pos = subsequence_begins[subsequence_idx + 1]; + const bool save_softmax_results = seq_idx == subsequence_end_pos - 1; +#else + const uint subsequence_idx = seq_idx; + const bool save_softmax_results = true; +#endif // MULTI_TOKENS_PROCESSING + // PagedAttention is supposed to save only last "row" of the QK matrix multiplication, + // so save SEQ_LEN_PARTITION_SIZE elements for each partition + if (save_softmax_results) { + const uint output_offset = subsequence_idx * HEADS_NUM * total_partitions_num * SEQ_LEN_PARTITION_SIZE + + head_num_idx * total_partitions_num * SEQ_LEN_PARTITION_SIZE + + partition_idx * SEQ_LEN_PARTITION_SIZE; + for (uint i = sgid * SUBGROUP_SIZE + sglid; i < SEQ_LEN_PARTITION_SIZE; i += SUBGROUPS_PER_WG * SUBGROUP_SIZE) { + softmax_results[output_offset + i] = slm_qk_vals[i]; + } + } +#endif // PAGED_ATTENTION_SCORES_OUTPUT } } @@ -370,6 +396,10 @@ KERNEL(pa_sdpa_finalization_stage)( const __global INPUT6_TYPE* subsequence_begins, #endif __global OUTPUT_TYPE* output, +#if PAGED_ATTENTION_SCORES_OUTPUT + __global SOFTMAX_ACCUMULATOR_TYPE* softmax_results, + const __global int* subsequence_offsets, +#endif const __global SOFTMAX_ACCUMULATOR_TYPE* exp_sums, const __global SOFTMAX_ACCUMULATOR_TYPE* max_logits, const __global OUTPUT_TYPE* tmp_out, @@ -500,3 +530,155 @@ KERNEL(pa_sdpa_finalization_stage)( } #endif + +#ifdef SDPA_STAGE_2 +#define MAX_PARTITIONS_NUM 128 + +REQD_SUB_GROUP_SIZE(SUBGROUP_SIZE) +KERNEL(pa_sdpa_scores_calculation)( + const __global INPUT3_TYPE* past_lens, + const __global INPUT6_TYPE* subsequence_begins, + __global OUTPUT1_TYPE* scores_output, + const __global SOFTMAX_ACCUMULATOR_TYPE* softmax_output, + const __global int* subsequence_offsets, + const __global SOFTMAX_ACCUMULATOR_TYPE* exp_sums, + const __global SOFTMAX_ACCUMULATOR_TYPE* max_logits, + const __global OUTPUT_TYPE* tmp_out, + const uint is_mixed_mode) { + const uint subsequence_idx = get_global_id(2); + const uint partition_global_idx = get_global_id(0); + const uint local_id = get_local_id(0); + const uint partition_idx = get_group_id(0); + const uint partition_size = get_local_size(0); + const uint max_seq_len = get_global_size(0); + const uint partitions_num = get_num_groups(0); + const uint sgid = get_sub_group_id(); + const uint sgid_num = get_num_sub_groups(); + const uint sglid = get_sub_group_local_id(); + + const int subsequence_begin = subsequence_begins[subsequence_idx]; + const int subsequence_end = subsequence_begins[subsequence_idx + 1]; + const uint seq_len = (subsequence_end - subsequence_begin) + past_lens[subsequence_idx]; + + const uint num_of_partitions = CEIL_DIV(seq_len, partition_size); + + if (partition_idx >= num_of_partitions) + return; + + __local SOFTMAX_ACCUMULATOR_TYPE slm_exp_sums[HEADS_NUM]; + __local SOFTMAX_ACCUMULATOR_TYPE slm_global_exp_sum[HEADS_NUM]; + + SOFTMAX_ACCUMULATOR_TYPE total_score = SOFTMAX_ACCUMULATOR_VAL_ZERO; + if (seq_len <= partition_size) { + // If seq_len is less than the partition size, just reduce the results over the heads + for (uint head_idx = 0; head_idx < HEADS_NUM; head_idx++) { + const uint input_offset = subsequence_idx * HEADS_NUM * max_seq_len + head_idx * max_seq_len + partition_global_idx; + SOFTMAX_ACCUMULATOR_TYPE softmax_value = softmax_output[input_offset]; + total_score += softmax_value; + } + } else if (seq_len <= partition_size * MAX_PARTITIONS_NUM) { + // Optimized version for longer prompts (up to partition_size * MAX_PARTITIONS_NUM, ~64K tokens) + + // Depending on the previous kernel exp_sums and max_logits might have different structure: + // For ordinary 1st and 2nd token kernels, there is only a single entry per subsequence. + // However, for mixed mode execution, exp_sums and max_logits include information for all + // tokens of each subsequence, but only the last one is needed for score calculation. + const uint subsequence_pos = is_mixed_mode ? subsequence_end - 1 : subsequence_idx; + + for (uint head_idx = sgid; head_idx < HEADS_NUM; head_idx += sgid_num) { + SOFTMAX_ACCUMULATOR_TYPE max_logit[MAX_PARTITIONS_NUM / SUBGROUP_SIZE]; + SOFTMAX_ACCUMULATOR_TYPE exp_sum[MAX_PARTITIONS_NUM / SUBGROUP_SIZE]; + + const uint exp_sums_offset = subsequence_pos * HEADS_NUM * partitions_num + head_idx * partitions_num; + for (int i = 0; i < partitions_num / SUBGROUP_SIZE; i++) { + max_logit[i] = max_logits[exp_sums_offset + i * SUBGROUP_SIZE + sglid]; + exp_sum[i] = exp_sums[exp_sums_offset + i * SUBGROUP_SIZE + sglid]; + } + + const uint partitions_leftovers = partitions_num % SUBGROUP_SIZE; + if (partitions_leftovers != 0) { + const uint idx = partitions_num / SUBGROUP_SIZE; + max_logit[idx] = sglid >= partitions_leftovers ? SOFTMAX_ACCUMULATOR_VAL_MIN : max_logits[exp_sums_offset + idx * SUBGROUP_SIZE + sglid]; + exp_sum[idx] = sglid >= partitions_leftovers ? SOFTMAX_ACCUMULATOR_VAL_ZERO : exp_sums[exp_sums_offset + idx * SUBGROUP_SIZE + sglid]; + } + + SOFTMAX_ACCUMULATOR_TYPE global_max_logit = max_logit[0]; + for (uint i = 1; i < CEIL_DIV(partitions_num, SUBGROUP_SIZE); i++) { + global_max_logit = SOFTMAX_ACCUMULATOR_MAX_FUNC(global_max_logit, max_logit[i]); + } + + global_max_logit = sub_group_reduce_max(global_max_logit); + + SOFTMAX_ACCUMULATOR_TYPE global_exp_sum = SOFTMAX_ACCUMULATOR_VAL_ZERO; + for (uint i = 0; i < CEIL_DIV(partitions_num, SUBGROUP_SIZE); i++) { + SOFTMAX_ACCUMULATOR_TYPE adjusted_exp_sum = exp_sum[i] * native_exp(max_logit[i] - global_max_logit); + // slm_exp_sums[head_idx][i * SUBGROUP_SIZE + sglid] = adjusted_exp_sum; + if (i * SUBGROUP_SIZE + sglid == partition_idx) + slm_exp_sums[head_idx] = adjusted_exp_sum; + global_exp_sum += adjusted_exp_sum; + } + + global_exp_sum = sub_group_reduce_add(global_exp_sum); + + slm_global_exp_sum[head_idx] = global_exp_sum; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + for (uint head_idx = 0; head_idx < HEADS_NUM; head_idx++) { + SOFTMAX_ACCUMULATOR_TYPE adjusted_exp_sum = slm_exp_sums[head_idx]; + SOFTMAX_ACCUMULATOR_TYPE global_exp_sum = slm_global_exp_sum[head_idx]; + + const uint input_offset = subsequence_idx * HEADS_NUM * max_seq_len + head_idx * max_seq_len + partition_global_idx; + SOFTMAX_ACCUMULATOR_TYPE softmax_value = softmax_output[input_offset]; + + softmax_value = softmax_value * adjusted_exp_sum / global_exp_sum; + total_score += softmax_value; + } + } else { + // Non optimized fallback version + const uint subsequence_pos = is_mixed_mode ? subsequence_end - 1 : subsequence_idx; + for (uint head_idx = 0; head_idx < HEADS_NUM; head_idx++) { + SOFTMAX_ACCUMULATOR_TYPE global_max_logit = SOFTMAX_ACCUMULATOR_VAL_MIN; + const uint max_logits_base_offset = subsequence_pos * HEADS_NUM * partitions_num + head_idx * partitions_num; + for (uint i = 0; i < CEIL_DIV(partitions_num, SUBGROUP_SIZE); i++) { + const uint partition_offset = i * SUBGROUP_SIZE + sglid; + SOFTMAX_ACCUMULATOR_TYPE max_logit = partition_offset >= partitions_num ? SOFTMAX_ACCUMULATOR_VAL_MIN : max_logits[max_logits_base_offset + partition_offset]; + global_max_logit = SOFTMAX_ACCUMULATOR_MAX_FUNC(global_max_logit, max_logit); + } + + global_max_logit = sub_group_reduce_max(global_max_logit); + + SOFTMAX_ACCUMULATOR_TYPE global_exp_sum = SOFTMAX_ACCUMULATOR_VAL_ZERO; + SOFTMAX_ACCUMULATOR_TYPE partition_adjusted_exp_sum = SOFTMAX_ACCUMULATOR_VAL_ZERO; + const uint exp_sums_base_offset = subsequence_pos * HEADS_NUM * partitions_num + head_idx * partitions_num; + for (uint i = 0; i < CEIL_DIV(partitions_num, SUBGROUP_SIZE); i++) { + const uint partition_offset = i * SUBGROUP_SIZE + sglid; + SOFTMAX_ACCUMULATOR_TYPE exp_sum = partition_offset >= partitions_num ? SOFTMAX_ACCUMULATOR_VAL_ZERO : exp_sums[exp_sums_base_offset + partition_offset]; + SOFTMAX_ACCUMULATOR_TYPE max_logit = partition_offset >= partitions_num ? SOFTMAX_ACCUMULATOR_VAL_MIN : max_logits[max_logits_base_offset + partition_offset]; + SOFTMAX_ACCUMULATOR_TYPE adjusted_exp_sum = exp_sum * native_exp(max_logit - global_max_logit); + global_exp_sum += adjusted_exp_sum; + + // Save and broadcast the adjusted exp_sum for the currently being processed partition + if (i == partition_idx / SUBGROUP_SIZE) + partition_adjusted_exp_sum = sub_group_broadcast(adjusted_exp_sum, partition_idx % SUBGROUP_SIZE); + } + + global_exp_sum = sub_group_reduce_add(global_exp_sum); + + const uint input_offset = subsequence_idx * HEADS_NUM * max_seq_len + head_idx * max_seq_len + partition_global_idx; + SOFTMAX_ACCUMULATOR_TYPE softmax_value = softmax_output[input_offset]; + + softmax_value = softmax_value * partition_adjusted_exp_sum / global_exp_sum; + total_score += softmax_value; + } + } + + const uint output_offset = subsequence_offsets[subsequence_idx]; + if (partition_global_idx < seq_len) { + scores_output[output_offset + partition_global_idx] = total_score; + } +} + +#undef MAX_PARTITIONS_NUM +#endif diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx.cl index 95f0d0ff399a3b..ee27d220e30ce9 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx.cl @@ -66,10 +66,7 @@ KERNEL (reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx)( #if (TILE_SIZE == DEFAULT_TILE_SIZE) - // read - INPUTVTYPE read_data = AS_INPUTVTYPE(_sub_group_block_read8((const __global uint*)(input) + input_idx_tile)); - - // write + // write index const uint output_idx = OUTPUT_GET_TILED_INDEX(OUTPUT_TILED_ORDER); if (F_NO_REMAINDER_CONDITION @@ -79,13 +76,25 @@ KERNEL (reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx)( ) { #ifdef X_REMAINDER_SIZE if (X_REMAINDER_CONDITION) { + // read + INPUTVTYPE read_data; + for (int j = 0; j < X_REMAINDER_SIZE; ++j) { + read_data[j] = AS_INPUT0_TYPE(_sub_group_block_read((const __global uint*)(input) + input_idx_tile + j * DEFAULT_STRIDE)); + } + // write for (int i = 0 ; i < X_REMAINDER_SIZE; i++) { output[output_idx + i] = TO_OUTPUT_TYPE(read_data[i]); } } else { + // read + INPUTVTYPE read_data = AS_INPUTVTYPE(_sub_group_block_read8((const __global uint*)(input) + input_idx_tile)); + // write VSTORE(TO_OUTPUTVTYPE(read_data), 0, output + output_idx); } #else + // read + INPUTVTYPE read_data = AS_INPUTVTYPE(_sub_group_block_read8((const __global uint*)(input) + input_idx_tile)); + // write VSTORE(TO_OUTPUTVTYPE(read_data), 0, output + output_idx); #endif } diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data_bfyx_to_blocked_format.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data_bfyx_to_blocked_format.cl index 45d0ccc5c0933e..2f403b798dea39 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data_bfyx_to_blocked_format.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data_bfyx_to_blocked_format.cl @@ -26,6 +26,18 @@ } \ } +#define FUNC_LOAD_LEFTOVERS(inner, outer) unroll_for (uint lh = 0; lh < outer; ++lh) { \ + const uint input_idx = INPUT0_GET_TILED_INDEX(INPUT0_TILED_ORDER); \ + INPUTVTYPE read_data; \ + unroll_for (uint lw = 0; lw < inner; ++lw) { \ + read_data[lw] = input[input_idx + lw]; \ + } \ + unroll_for (uint lw = 0; lw < inner; ++lw) { \ + const uint dst = local_buf_offset + lw; \ + transpose_buf[dst][lh] = read_data[lw]; \ + } \ + } + #define FUNC_VSTORE(loop) unroll_for (uint lw = 0; lw < loop; ++lw) { \ const uint output_idx = output_idx_tile + (lw * x_pitch); \ VSTORE(TO_OUTPUTVTYPE(transpose_buf[local_buf_offset + lw]), 0, output + output_idx); \ @@ -109,7 +121,15 @@ KERNEL (reorder_data_bfyx_to_blocked_format)( if (F_NO_REMAINDER_CONDITION) { // read and transpose +#ifdef X_REMAINDER_CONDITION + if (X_NO_REMAINDER_CONDITION) { + FUNC_VLOAD(TILE_SIZE, TILE_SIZE) + } else { + FUNC_LOAD_LEFTOVERS(X_REMAINDER_SIZE, TILE_SIZE) + } +#else FUNC_VLOAD(TILE_SIZE, TILE_SIZE) +#endif // write to ddr #ifdef X_REMAINDER_CONDITION @@ -125,7 +145,15 @@ KERNEL (reorder_data_bfyx_to_blocked_format)( #ifdef F_REMAINDER_CONDITION else if (F_REMAINDER_CONDITION) { // read and transpose + #ifdef X_REMAINDER_CONDITION + if (X_NO_REMAINDER_CONDITION) { + FUNC_VLOAD(TILE_SIZE, F_REMAINDER_SIZE) + } else { + FUNC_LOAD_LEFTOVERS(X_REMAINDER_SIZE, F_REMAINDER_SIZE) + } + #else FUNC_VLOAD(TILE_SIZE, F_REMAINDER_SIZE) + #endif // write to ddr #ifdef X_REMAINDER_CONDITION diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/sdpa_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/sdpa_opt.cl index 55f87e4189d9fe..cddafe62623d9e 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/sdpa_opt.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/sdpa_opt.cl @@ -842,6 +842,14 @@ KERNEL(sdpa_opt)( const __global int* blocked_indexes_start, const __global int* blocked_indexes_end, const __global int* gws_seq_indexes_correspondence +#if PAGED_ATTENTION_SCORES_OUTPUT + , __global SOFTMAX_ACCUMULATOR_TYPE* softmax_results + , const __global int* subsequence_offsets + , __global SOFTMAX_ACCUMULATOR_TYPE* exp_sums + , __global SOFTMAX_ACCUMULATOR_TYPE* max_logits + , __global OUTPUT_TYPE* tmp_out + , const uint aligned_max_context_len +#endif #else __global SOFTMAX_ACCUMULATOR_TYPE* exp_sums, __global SOFTMAX_ACCUMULATOR_TYPE* max_logits, @@ -1222,6 +1230,39 @@ KERNEL(sdpa_opt)( slm_qk_vals[sglid * SEQ_LEN_PARTITION_SIZE + sgid * TARGET_SEQ_LEN_BLOCK_SIZE + i] = qk_acc[i]; } +#if PAGED_ATTENTION_SCORES_OUTPUT + const uint subsequence_idx = gws_seq_indexes_correspondence[target_seq_dim]; + const uint subsequence_end_pos = subsequence_begins[subsequence_idx + 1]; + const uint block_start_pos = blocked_indexes_start[target_seq_dim]; + const uint block_end_pos = blocked_indexes_end[target_seq_dim]; + + // PagedAttention is supposed to save only last "row" of the QK matrix multiplication, + // so save SEQ_LEN_PARTITION_SIZE elements for each partition + if (subsequence_end_pos == block_end_pos) { + const uint last_row_idx = block_end_pos - block_start_pos - 1; + if (sglid == last_row_idx) { + const uint partition_idx = start_partition_idx / SEQ_LEN_PARTITION_SIZE; + + if (sgid == 0) { + const uint max_partitions_num = aligned_max_context_len / SEQ_LEN_PARTITION_SIZE; + const uint exp_sums_output_offset = subsequence_idx * NUM_HEADS * max_partitions_num + + num_heads_dim * max_partitions_num + + partition_idx; + exp_sums[exp_sums_output_offset] = exp_sum_new; + max_logits[exp_sums_output_offset] = qk_max_new; + } + + const uint output_offset = subsequence_idx * NUM_HEADS * aligned_max_context_len + + num_heads_dim * aligned_max_context_len + + partition_idx * SEQ_LEN_PARTITION_SIZE + sgid * TARGET_SEQ_LEN_BLOCK_SIZE; + for (uint i = 0; i < TARGET_SEQ_LEN_BLOCK_SIZE; i++) { + softmax_results[output_offset + i] = qk_acc[i]; + } + + } + } +#endif + barrier(CLK_LOCAL_MEM_FENCE); } diff --git a/src/plugins/intel_gpu/src/kernel_selector/cm_kernels/example.cpp b/src/plugins/intel_gpu/src/kernel_selector/cm_kernels/example.cpp new file mode 100644 index 00000000000000..abee70f6483d17 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/cm_kernels/example.cpp @@ -0,0 +1,26 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +namespace KERNEL_NAME { + +#include "include/example_header.h" + +extern "C" _GENX_MAIN_ void KERNEL_NAME(svmptr_t x [[type("svmptr_t")]]) { + // This kernel prints and exits + if (cm_linear_global_id() == 0) { + printf("Example CM kernel\n"); + printf("Pointer address: %p\n", (void*)x); + + // Call function from header + print_lws_gws(); + + // Check macro from batch header +#ifdef EXAMPLE_CM_MACRO + printf("Batch header included\n"); +#else + printf("Batch header not included\n"); +#endif + } +} +} // namespace KERNEL_NAME diff --git a/src/plugins/intel_gpu/src/kernel_selector/cm_kernels/include/batch_headers/exmaple_batch_header.h b/src/plugins/intel_gpu/src/kernel_selector/cm_kernels/include/batch_headers/exmaple_batch_header.h new file mode 100644 index 00000000000000..f3f2aa183e88dc --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/cm_kernels/include/batch_headers/exmaple_batch_header.h @@ -0,0 +1,5 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#define EXAMPLE_CM_MACRO diff --git a/src/plugins/intel_gpu/src/kernel_selector/cm_kernels/include/example_header.h b/src/plugins/intel_gpu/src/kernel_selector/cm_kernels/include/example_header.h new file mode 100644 index 00000000000000..3ce3a33188d0fc --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/cm_kernels/include/example_header.h @@ -0,0 +1,8 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +inline void print_lws_gws() { + printf("lws: %d, %d, %d\n", cm_local_size(0), cm_local_size(1), cm_local_size(2)); + printf("gws: %d, %d, %d\n", cm_group_count(0), cm_group_count(1), cm_group_count(2)); +} diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_base_cm.h b/src/plugins/intel_gpu/src/kernel_selector/kernel_base_cm.h new file mode 100644 index 00000000000000..32744f65bee7e0 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_base_cm.h @@ -0,0 +1,43 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "kernel_base.h" + +namespace kernel_selector { + +class KernelBaseCM : public KernelBase { +public: + using KernelBase::KernelBase; + virtual ~KernelBaseCM() {} + +protected: + virtual bool Validate(const Params&) const { + return true; + } + std::shared_ptr GetKernelString(const std::string& kernel_name, + const std::pair& jit, + const std::string& entry_point) const { + std::shared_ptr kernel_string = std::make_shared(); + + bool is_cm = true; + auto codes = db.get(kernel_name, is_cm); + + if (codes.size()) { + kernel_string->str = codes[0]; + kernel_string->jit = "#include \n#include \n"; + kernel_string->jit += jit.first; + kernel_string->undefs = jit.second; + kernel_string->options = " -cmc "; + + kernel_string->entry_point = entry_point; + kernel_string->batch_compilation = true; + kernel_string->language = KernelLanguage::CM; + } + + return kernel_string; + } +}; +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.h b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.h index d9b132ac1dcc43..b55740110b2f28 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.h @@ -46,6 +46,7 @@ namespace kernel_selector { std::string GetStringEnv(const char* varName); +using KernelLanguage = cldnn::kernel_language; using KernelString = cldnn::kernel_string; using WorkGroupSizes = cldnn::work_group_sizes; using ScalarDescriptor = cldnn::scalar_desc; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp index 6fd074f8d8506d..7150d51ecf1e48 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp @@ -264,6 +264,8 @@ JitConstants ConvolutionKernel_b_fs_yx_fsv16_1x1::GetJitConstants(const convolut } if (params.inputs[0].Feature().v % tuning_data.feature_block_size != 0) { jit.AddConstant(MakeJitConstant("INPUT_LEFTOVERS", 1)); + } else { + jit.AddConstant(MakeJitConstant("INPUT_LEFTOVERS", 0)); } } else { DimensionAccessHelperJit input0_dims(params.inputs[0]); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/cm/fully_connected_cm_example.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/cm/fully_connected_cm_example.cpp new file mode 100644 index 00000000000000..32719501d937d2 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/cm/fully_connected_cm_example.cpp @@ -0,0 +1,54 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "fully_connected_cm_example.h" + +namespace kernel_selector { +KernelsData FullyConnected_cm_example::GetKernelsData(const Params& params) const { + if (!Validate(params)) { + return {}; + } + auto options = std::string(" -Qxcm_jit_option=-DPASTokenReduction "); + + KernelData kd = KernelData::Default(params, 1); + auto& kernel = kd.kernels[0]; + + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); + kernel.params.workGroups.local = {1, 2, 4}; + kernel.params.workGroups.global = {1, 4, 8}; + + std::string kernel_name = "fully_connected_cm_example"; + auto jit = std::pair("\n#define KERNEL_NAME " + kernel_name, "#undef KERNEL_NAME"); + kernel.code.kernelString = GetKernelString("example", jit, kernel_name); + kernel.code.kernelString->options += options; + kernel.code.kernelString->batch_compilation = true; + return {kd}; +} +KernelsPriority FullyConnected_cm_example::GetKernelsPriority(const Params& params) const { + return TUTORIAL_PRIORITY; +} +ParamsKey FullyConnected_cm_example::GetSupportedKey() const { + ParamsKey k; + k.EnableInputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::F16); + k.EnableInputWeightsType(WeightsType::F16); + k.EnableInputWeightsType(WeightsType::UINT8); + k.EnableAllInputLayout(); + k.EnableAllOutputLayout(); + k.EnableDifferentInputWeightsTypes(); + k.EnableDifferentTypes(); + k.EnableBiasPerOutput(); + k.EnableBiasPerFeature(); + k.EnableNonBiasTerm(); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBatching(); + k.EnableQuantization(QuantizationType::SYMMETRIC); + k.EnableWeightsCompression(); + return k; +} +bool FullyConnected_cm_example::Validate(const Params& p) const { + return true; +} +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/cm/fully_connected_cm_example.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/cm/fully_connected_cm_example.h new file mode 100644 index 00000000000000..844f3395bd8430 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/cm/fully_connected_cm_example.h @@ -0,0 +1,21 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "../fully_connected_params.h" +#include "kernel_base_cm.h" + +namespace kernel_selector { +class FullyConnected_cm_example : public KernelBaseCM { +public: + FullyConnected_cm_example() : KernelBaseCM("fully_connected_example") {} + virtual ~FullyConnected_cm_example() {} + + KernelsData GetKernelsData(const Params& params) const override; + KernelsPriority GetKernelsPriority(const Params& params) const override; + ParamsKey GetSupportedKey() const override; + bool Validate(const Params& p) const override; +}; +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/cm/fully_connected_cm_kernel_selector.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/cm/fully_connected_cm_kernel_selector.cpp new file mode 100644 index 00000000000000..dfc6d4342b1490 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/cm/fully_connected_cm_kernel_selector.cpp @@ -0,0 +1,17 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "fully_connected_cm_kernel_selector.h" + +#include "fully_connected_cm_example.h" + +namespace kernel_selector { +fully_connected_cm_kernel_selector::fully_connected_cm_kernel_selector() { + Attach(); +} + +KernelsData fully_connected_cm_kernel_selector::GetBestKernels(const Params& params) const { + return GetAutoTuneBestKernel(params, KernelType::FULLY_CONNECTED); +} +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/cm/fully_connected_cm_kernel_selector.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/cm/fully_connected_cm_kernel_selector.h new file mode 100644 index 00000000000000..937d605f9ebad2 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/cm/fully_connected_cm_kernel_selector.h @@ -0,0 +1,24 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "kernel_selector.h" + +namespace kernel_selector { + +class fully_connected_cm_kernel_selector : public kernel_selector_base { +public: + static fully_connected_cm_kernel_selector& Instance() { + static fully_connected_cm_kernel_selector instance_; + return instance_; + } + + fully_connected_cm_kernel_selector(); + + virtual ~fully_connected_cm_kernel_selector() {} + + KernelsData GetBestKernels(const Params& params) const override; +}; +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/pa_kv_cache_update_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/pa_kv_cache_update_kernel_ref.cpp index ddfb491f50278a..ce20f49de597ff 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/pa_kv_cache_update_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/pa_kv_cache_update_kernel_ref.cpp @@ -167,7 +167,7 @@ void KVCacheUpdateKernelRef::GetUpdateDispatchDataFunc(KernelData& kd) const { const auto indexes_dt = Datatype::INT32; const auto target_seq_len_block_size = 16; - const auto target_seq_len = prim_params.conf.paged_attention_aligned_seq_len; + const auto target_seq_len = std::max(prim_params.conf.paged_attention_aligned_seq_len, static_cast(1)); const auto indexes_buf_size = CeilDiv(target_seq_len, target_seq_len_block_size) * BytesPerElement(indexes_dt); kd.internalBufferSizes.clear(); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/pa_sdpa_kernel_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/pa_sdpa_kernel_opt.cpp index 63c5e74160f652..909a40d677f535 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/pa_sdpa_kernel_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/pa_sdpa_kernel_opt.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "sdpa_kernel_opt.h" #include "pa_sdpa_kernel_opt.h" #include "kernel_selector_params.h" @@ -15,6 +16,7 @@ enum KernelsTypes { MULTI_TOKENS, FINALIZATION, FINALIZATION_MULTI_TOKENS, + SCORES_CALCULATION, TOTAL_KERNELS_NUM }; @@ -35,6 +37,8 @@ static std::string GetKernelName(std::string base_name, KernelsTypes type) { kernel_name += "_finalization"; } else if (type == KernelsTypes::FINALIZATION_MULTI_TOKENS) { kernel_name += "_finalization_multi_tokens_seq"; + } else if (type == KernelsTypes::SCORES_CALCULATION) { + kernel_name += "_scores_calculation"; } return kernel_name; @@ -46,10 +50,15 @@ KernelsData PagedAttentionSDPAKernelOpt::GetKernelsData(const Params& p) const { } const auto& params = static_cast(p); - const std::vector kernels_type = { KernelsTypes::SINGLE_TOKEN, - KernelsTypes::MULTI_TOKENS, - KernelsTypes::FINALIZATION, - KernelsTypes::FINALIZATION_MULTI_TOKENS }; + std::vector kernels_type = { KernelsTypes::SINGLE_TOKEN, + KernelsTypes::MULTI_TOKENS, + KernelsTypes::FINALIZATION, + KernelsTypes::FINALIZATION_MULTI_TOKENS }; + + const auto has_scores_output = params.outputs.size() > 1; + if (has_scores_output) { + kernels_type.push_back(KernelsTypes::SCORES_CALCULATION); + } KernelData kd = KernelData::Default(params, kernels_type.size()); kd.needs_sub_kernels_sync = true; @@ -65,7 +74,8 @@ KernelsData PagedAttentionSDPAKernelOpt::GetKernelsData(const Params& p) const { const auto jit = CreateJit(kernel_name, jit_constants, entry_point); - size_t inputs_num = static_cast(params.inputs.size()); + int inputs_num = static_cast(params.inputs.size()); + int outputs_num = 1; if (kernel_type == KernelsTypes::SINGLE_TOKEN) { // SINGLE_TOKEN kernel doesn't use the subsequence_begins input inputs_num -= 1; @@ -75,6 +85,11 @@ KernelsData PagedAttentionSDPAKernelOpt::GetKernelsData(const Params& p) const { } else if (kernel_type == KernelsTypes::FINALIZATION_MULTI_TOKENS) { // FINALIZATION_MULTI_TOKENS kernel uses past_lens data input and subsequence_begins inputs_num = 2; + } else if (kernel_type == KernelsTypes::SCORES_CALCULATION) { + // SCORES_CALCULATION kernel uses past_lens data input and subsequence_begins + inputs_num = 2; + // Output is configured manually to use the second output memory buffer + outputs_num = 0; } auto& kernel = kd.kernels[kd_kernels_idx++]; @@ -87,19 +102,33 @@ KernelsData PagedAttentionSDPAKernelOpt::GetKernelsData(const Params& p) const { {}, false, false, - static_cast(inputs_num), + inputs_num, GetFusedPrimitiveInputsCount(params), - static_cast(params.outputs.size()), + outputs_num, params.is_shape_agnostic); - kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); - kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); - kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 2}); + if (kernel_type == KernelsTypes::SCORES_CALCULATION) { + kernel.params.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 1}); + } + + uint32_t internal_buffers_num = 0; + if (has_scores_output) { + // Intermediate softmax results for scores output calculation and precalculated accumulated + // sequence length offsets for each subsequence + internal_buffers_num += 2; + } + + // Softmax's exp_sums, max_logits and intermediate output + internal_buffers_num += 3; if (kernel_type == KernelsTypes::MULTI_TOKENS || kernel_type == KernelsTypes::FINALIZATION_MULTI_TOKENS) { // MULTIPLE_TOKENS kernels needs additional information related to mapping // launched kernel instances to subsequence indexes - kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 3}); + internal_buffers_num++; + } + + for (uint32_t i = 0; i < internal_buffers_num; i++) { + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, i}); } if (kernel_type == KernelsTypes::FINALIZATION || kernel_type == KernelsTypes::FINALIZATION_MULTI_TOKENS) { @@ -108,6 +137,15 @@ KernelsData PagedAttentionSDPAKernelOpt::GetKernelsData(const Params& p) const { // Remove unused shape_info argument at finalization stage kernel.params.arguments.erase(kernel.params.arguments.begin()); } + + if (kernel_type == KernelsTypes::SCORES_CALCULATION) { + // The scores kernel needs to know if the current execution mode is mixed or ordinary + // to configure proper memory access + kernel.params.arguments.push_back({ArgumentDescriptor::Types::SCALAR, 0}); + + // Remove unused shape_info argument for scores kernel + kernel.params.arguments.erase(kernel.params.arguments.begin()); + } } return {kd}; @@ -173,7 +211,12 @@ JitConstants PagedAttentionSDPAKernelOpt::GetJitConstants(const pa_sdpa_params& jit.AddConstant(MakeJitConstant("BROADCAST_GROUP_SIZE", config.group_size)); } - auto sdpa_stage = kernel_idx == KernelsTypes::FINALIZATION || kernel_idx == KernelsTypes::FINALIZATION_MULTI_TOKENS ? 1 : 0; + auto sdpa_stage = 0; + if (kernel_idx == KernelsTypes::FINALIZATION || kernel_idx == KernelsTypes::FINALIZATION_MULTI_TOKENS) { + sdpa_stage = 1; + } else if (kernel_idx == KernelsTypes::SCORES_CALCULATION) { + sdpa_stage = 2; + } jit.AddConstant(MakeJitConstant("SDPA_STAGE_" + std::to_string(sdpa_stage), 1)); if (config.has_const_scale_val) { @@ -190,6 +233,10 @@ JitConstants PagedAttentionSDPAKernelOpt::GetJitConstants(const pa_sdpa_params& jit.Merge(MakeTypeJitConstants(params.inputs[alibi_input_idx].GetDType(), "ALIBI_INPUT")); } + if (params.outputs.size() > 1) { + jit.AddConstant(MakeJitConstant("PAGED_ATTENTION_SCORES_OUTPUT", 1)); + } + if (kernel_idx == KernelsTypes::MULTI_TOKENS || kernel_idx == KernelsTypes::FINALIZATION_MULTI_TOKENS) jit.AddConstant(MakeJitConstant("MULTI_TOKENS_PROCESSING", 1)); @@ -203,18 +250,36 @@ CommonDispatchData PagedAttentionSDPAKernelOpt::SetDefault(const pa_sdpa_params& const auto& input = params.inputs[0]; if (!input.is_dynamic()) { - const size_t sequences_number = input.Batch().v; - const size_t num_of_partitions = CeilDiv(params.max_context_len, seq_len_partition_size); + const size_t total_tokens = input.Batch().v; + const size_t num_of_partitions = CeilDiv(params.conf.paged_attention_max_len, seq_len_partition_size); const size_t heads_num = static_cast(params.conf.heads_num); const size_t head_size = static_cast(params.conf.head_size); - if (kernel_idx == 0) { - dispatch_data.gws = { sequences_number, + if (kernel_idx == KernelsTypes::SINGLE_TOKEN || kernel_idx == KernelsTypes::MULTI_TOKENS) { + dispatch_data.gws = { total_tokens, heads_num, head_size * num_of_partitions }; dispatch_data.lws = { 1, 1, head_size }; + } else if (kernel_idx == KernelsTypes::SCORES_CALCULATION) { + const auto& past_lens = params.inputs[3]; + const auto subsequences_number = past_lens.Batch().v; + + size_t partition_size = 0; + size_t num_of_partitions = 0; + if (params.stage == PagedAttentionStage::PREFILL) { + partition_size = SDPAKernelOpt::get_seq_len_partition_size(params, params.conf.head_size, 1); + } else { + partition_size = seq_len_partition_size; + } + + num_of_partitions = CeilDiv(params.conf.paged_attention_max_len, partition_size); + + dispatch_data.gws = { partition_size * num_of_partitions, + 1, + subsequences_number }; + dispatch_data.lws = { partition_size, 1, 1 }; } else { - dispatch_data.gws = { sequences_number, + dispatch_data.gws = { total_tokens, heads_num, head_size }; dispatch_data.lws = { 1, 1, subgroup_size }; @@ -228,30 +293,39 @@ void PagedAttentionSDPAKernelOpt::GetUpdateDispatchDataFunc(KernelData& kd) cons kd.update_dispatch_data_func = [](const Params& params, KernelData& kd) { const auto& prim_params = static_cast(params); - const size_t expected_kernels_num = 4; - OPENVINO_ASSERT(kd.kernels.size() == expected_kernels_num, "[GPU] Invalid kernels size for update dispatch data func of SDPA kernel"); + const auto has_scores_output = prim_params.outputs.size() > 1; + const auto expected_kernels_num = has_scores_output ? KernelsTypes::TOTAL_KERNELS_NUM : KernelsTypes::TOTAL_KERNELS_NUM - 1; + OPENVINO_ASSERT(kd.kernels.size() == static_cast(expected_kernels_num), + "[GPU] Invalid kernels size for update dispatch data func of SDPA kernel"); + + const auto scores_calc_only = prim_params.stage == PagedAttentionStage::PREFILL && has_scores_output; + const auto multi_tokens_mode = prim_params.stage == PagedAttentionStage::MIXED; auto dispatch_data1 = SetDefault(prim_params, KernelsTypes::SINGLE_TOKEN); kd.kernels[KernelsTypes::SINGLE_TOKEN].params.workGroups.global = dispatch_data1.gws; kd.kernels[KernelsTypes::SINGLE_TOKEN].params.workGroups.local = dispatch_data1.lws; - kd.kernels[KernelsTypes::SINGLE_TOKEN].skip_execution = prim_params.multi_tokens_mode; + kd.kernels[KernelsTypes::SINGLE_TOKEN].skip_execution = multi_tokens_mode || scores_calc_only; kd.kernels[KernelsTypes::MULTI_TOKENS].params.workGroups.global = dispatch_data1.gws; kd.kernels[KernelsTypes::MULTI_TOKENS].params.workGroups.local = dispatch_data1.lws; - kd.kernels[KernelsTypes::MULTI_TOKENS].skip_execution = !prim_params.multi_tokens_mode; + kd.kernels[KernelsTypes::MULTI_TOKENS].skip_execution = !multi_tokens_mode || scores_calc_only; - const auto& input = prim_params.inputs[0]; - const size_t sequences_number = input.Batch().v; - const size_t num_of_partitions = CeilDiv(prim_params.max_context_len, seq_len_partition_size); + size_t partition_size = 0; + if (prim_params.stage == PagedAttentionStage::PREFILL) { + partition_size = SDPAKernelOpt::get_seq_len_partition_size(params, prim_params.conf.head_size, 1); + } else { + partition_size = seq_len_partition_size; + } + const size_t num_of_partitions = CeilDiv(prim_params.conf.paged_attention_max_len, partition_size); auto dispatch_data2 = SetDefault(prim_params, KernelsTypes::FINALIZATION); kd.kernels[KernelsTypes::FINALIZATION].params.workGroups.global = dispatch_data2.gws; kd.kernels[KernelsTypes::FINALIZATION].params.workGroups.local = dispatch_data2.lws; - kd.kernels[KernelsTypes::FINALIZATION].skip_execution = num_of_partitions == 1 || prim_params.multi_tokens_mode; + kd.kernels[KernelsTypes::FINALIZATION].skip_execution = num_of_partitions == 1 || multi_tokens_mode || scores_calc_only; kd.kernels[KernelsTypes::FINALIZATION_MULTI_TOKENS].params.workGroups.global = dispatch_data2.gws; kd.kernels[KernelsTypes::FINALIZATION_MULTI_TOKENS].params.workGroups.local = dispatch_data2.lws; - kd.kernels[KernelsTypes::FINALIZATION_MULTI_TOKENS].skip_execution = num_of_partitions == 1 || !prim_params.multi_tokens_mode; + kd.kernels[KernelsTypes::FINALIZATION_MULTI_TOKENS].skip_execution = num_of_partitions == 1 || !multi_tokens_mode || scores_calc_only; ScalarDescriptor num_of_partitions_scalar; num_of_partitions_scalar.t = ScalarDescriptor::Types::UINT32; @@ -261,23 +335,63 @@ void PagedAttentionSDPAKernelOpt::GetUpdateDispatchDataFunc(KernelData& kd) cons kd.kernels[KernelsTypes::FINALIZATION_MULTI_TOKENS].params.scalars.resize(1); kd.kernels[KernelsTypes::FINALIZATION_MULTI_TOKENS].params.scalars[0] = num_of_partitions_scalar; + if (has_scores_output) { + auto dispatch_data = SetDefault(prim_params, KernelsTypes::SCORES_CALCULATION); + kd.kernels[KernelsTypes::SCORES_CALCULATION].params.workGroups.global = dispatch_data.gws; + kd.kernels[KernelsTypes::SCORES_CALCULATION].params.workGroups.local = dispatch_data.lws; + kd.kernels[KernelsTypes::SCORES_CALCULATION].skip_execution = false; + + ScalarDescriptor is_mixed_mode; + is_mixed_mode.t = ScalarDescriptor::Types::UINT32; + is_mixed_mode.v.u32 = static_cast(multi_tokens_mode); + kd.kernels[KernelsTypes::SCORES_CALCULATION].params.scalars.resize(1); + kd.kernels[KernelsTypes::SCORES_CALCULATION].params.scalars[0] = is_mixed_mode; + } + + const auto& input = prim_params.inputs[0]; + const size_t total_tokens = input.Batch().v; + auto buf_dt_size = BytesPerElement(softmax_acc_dt); - auto buf_elements_count = sequences_number * prim_params.conf.heads_num * num_of_partitions; + auto buf_elements_count = total_tokens * prim_params.conf.heads_num * num_of_partitions; auto buf_size = buf_elements_count * buf_dt_size; auto tmp_out_dt_size = BytesPerElement(softmax_acc_dt); - auto tmp_out_elements_count = sequences_number * prim_params.conf.heads_num * prim_params.conf.head_size * num_of_partitions; + auto tmp_out_elements_count = total_tokens * prim_params.conf.heads_num * prim_params.conf.head_size * num_of_partitions; auto tmp_out_size = tmp_out_elements_count * tmp_out_dt_size; kd.internalBufferSizes.clear(); - kd.internalBufferSizes.push_back(buf_size); - kd.internalBufferSizes.push_back(buf_size); - kd.internalBufferSizes.push_back(tmp_out_size); + + if (has_scores_output) { + const auto& past_lens = prim_params.inputs[3]; + auto subsequences_number = past_lens.Batch().v; + auto softmax_buf_dt_size = BytesPerElement(softmax_acc_dt); + + auto softmax_buf_elements_count = subsequences_number * prim_params.conf.heads_num * num_of_partitions * partition_size; + auto softmax_buf_size = softmax_buf_elements_count * softmax_buf_dt_size; + + // Softmax intermediate output + kd.internalBufferSizes.push_back(softmax_buf_size); + // Precalculated accumulated sequence length offsets for each subsequence + kd.internalBufferSizes.push_back(subsequences_number * BytesPerElement(Datatype::INT32)); + + if (prim_params.stage == PagedAttentionStage::PREFILL) { + // Recalculate buf_size as in case of PREFILL stage it's not needed to allocate buffer per each input token + buf_elements_count = subsequences_number * prim_params.conf.heads_num * num_of_partitions; + buf_size = buf_elements_count * buf_dt_size; + + // Intermediate tmp output buffer is not used for PREFILL stage + tmp_out_size = tmp_out_dt_size; + } + } + + kd.internalBufferSizes.push_back(buf_size); // softmax exp_sums + kd.internalBufferSizes.push_back(buf_size); // softmax max_logits + kd.internalBufferSizes.push_back(tmp_out_size); // intermediate output kd.internalBufferDataType = softmax_acc_dt; - if (prim_params.multi_tokens_mode) { + if (multi_tokens_mode) { auto buf_dt_size = BytesPerElement(Datatype::INT32); - auto buf_elements_count = sequences_number; + auto buf_elements_count = total_tokens; auto buf_size = Align(buf_elements_count * buf_dt_size, BytesPerElement(softmax_acc_dt)); kd.internalBufferSizes.push_back(buf_size); } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/pa_sdpa_kernel_opt.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/pa_sdpa_kernel_opt.h index a2456ccd9e2af5..a52571b03691df 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/pa_sdpa_kernel_opt.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/pa_sdpa_kernel_opt.h @@ -9,11 +9,17 @@ namespace kernel_selector { +enum PagedAttentionStage { + GENERATE = 0, + PREFILL = 1, + MIXED = 2, + UNKNOWN = 3 +}; + struct pa_sdpa_params : base_params { pa_sdpa_params() : base_params(KernelType::PA_SDPA) {} - bool multi_tokens_mode = false; - size_t max_context_len = 0; + PagedAttentionStage stage = PagedAttentionStage::UNKNOWN; sdpa_configuration conf; }; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_base.h index 5cd9c384ff2709..8fcc4a16692d6c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_base.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_base.h @@ -97,6 +97,7 @@ struct sdpa_configuration { bool is_paged_attention = false; int64_t paged_attention_aligned_seq_len = -1; int64_t paged_attention_block_size = 0; + int64_t paged_attention_max_len = 0; bool has_const_scale_val = false; float scale_val = 0.f; }; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.cpp index 4e71064efbc895..4c23d4de4fd68d 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.cpp @@ -21,38 +21,11 @@ enum KernelsTypes { constexpr size_t subgroup_size = 16; } // namespace -static size_t get_sg_number_scale_factor(const sdpa_params& sdpa_params, size_t kernel_type) { - const size_t optimal_scale_factor = 2; - if (kernel_type == KernelsTypes::MULTI_TOKENS) { - if (sdpa_params.conf.head_size * optimal_scale_factor <= sdpa_params.engineInfo.maxWorkGroupSize) { - return optimal_scale_factor; - } - } else if (kernel_type == KernelsTypes::SINGLE_TOKEN) { - if (sdpa_params.conf.head_size * optimal_scale_factor <= sdpa_params.engineInfo.maxWorkGroupSize && - sdpa_params.conf.head_size * optimal_scale_factor / subgroup_size <= subgroup_size) { - return optimal_scale_factor; - } - } - - return 1; -} - static size_t get_target_seq_len_block_size() { const size_t block_size = 16; return block_size; } -static size_t get_seq_len_partition_size(const sdpa_params& sdpa_params, size_t kernel_type) { - size_t seq_len = 0; - if (kernel_type == KernelsTypes::MULTI_TOKENS) { - seq_len = sdpa_params.conf.head_size * get_sg_number_scale_factor(sdpa_params, kernel_type); - } else { - seq_len = 256; - } - - return seq_len; -} - static Datatype get_softmax_acc_type() { return Datatype::F32; } @@ -71,7 +44,7 @@ static size_t get_partitions_num(const sdpa_params& sdpa_params, size_t kernel_t TransposedDimensionAccessHelperBase dims_k(sdpa_params.inputs[1], sdpa_params.input1_order); auto source_seq_len = dims_k.y_dim().v; - return CeilDiv(source_seq_len, get_seq_len_partition_size(sdpa_params, kernel_type)); + return CeilDiv(source_seq_len, SDPAKernelOpt::get_seq_len_partition_size(sdpa_params, sdpa_params.conf.head_size, kernel_type)); } static std::vector get_internal_buffer_sizes(const sdpa_params& sdpa_params, size_t kernel_type) { @@ -130,6 +103,33 @@ static std::string GetKernelName(std::string base_name, KernelsTypes type, const return kernel_name; } +size_t SDPAKernelOpt::get_sg_number_scale_factor(const Params& params, size_t head_size, size_t kernel_type) { + const size_t optimal_scale_factor = 2; + if (kernel_type == KernelsTypes::MULTI_TOKENS) { + if (head_size * optimal_scale_factor <= params.engineInfo.maxWorkGroupSize) { + return optimal_scale_factor; + } + } else if (kernel_type == KernelsTypes::SINGLE_TOKEN) { + if (head_size * optimal_scale_factor <= params.engineInfo.maxWorkGroupSize && + head_size * optimal_scale_factor / subgroup_size <= subgroup_size) { + return optimal_scale_factor; + } + } + + return 1; +} + +size_t SDPAKernelOpt::get_seq_len_partition_size(const Params& params, size_t head_size, size_t kernel_type) { + size_t seq_len = 0; + if (kernel_type == KernelsTypes::MULTI_TOKENS) { + seq_len = head_size * get_sg_number_scale_factor(params, head_size, kernel_type); + } else { + seq_len = 256; + } + + return seq_len; +} + ParamsKey SDPAKernelOpt::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::INT8); @@ -176,14 +176,14 @@ JitConstants SDPAKernelOpt::GetJitConstants(const sdpa_params& params, size_t ke const auto& config = params.conf; jit.AddConstant(MakeJitConstant("SUBGROUP_SIZE", subgroup_size)); jit.AddConstant(MakeJitConstant("HEAD_SIZE", config.head_size)); - jit.AddConstant(MakeJitConstant("SEQ_LEN_PARTITION_SIZE", get_seq_len_partition_size(params, kernel_idx))); + jit.AddConstant(MakeJitConstant("SEQ_LEN_PARTITION_SIZE", get_seq_len_partition_size(params, config.head_size, kernel_idx))); auto target_seq_len_block_size = kernel_idx == KernelsTypes::SINGLE_TOKEN ? 1 : get_target_seq_len_block_size(); jit.AddConstant(MakeJitConstant("TARGET_SEQ_LEN_BLOCK_SIZE", target_seq_len_block_size)); auto sdpa_stage = kernel_idx == KernelsTypes::FINALIZATION ? 1 : 0; jit.AddConstant(MakeJitConstant("SDPA_STAGE_" + std::to_string(sdpa_stage), 1)); - jit.AddConstant(MakeJitConstant("SG_SCALE_FACTOR", get_sg_number_scale_factor(params, kernel_idx))); + jit.AddConstant(MakeJitConstant("SG_SCALE_FACTOR", get_sg_number_scale_factor(params, config.head_size, kernel_idx))); if (params.conf.is_paged_attention) { if (params.conf.has_alibi_input) { @@ -196,6 +196,10 @@ JitConstants SDPAKernelOpt::GetJitConstants(const sdpa_params& params, size_t ke } else { jit.AddConstant(MakeJitConstant("HAS_SCALE_INPUT", 1)); } + + if (params.outputs.size() > 1) { + jit.AddConstant(MakeJitConstant("PAGED_ATTENTION_SCORES_OUTPUT", 1)); + } } else if (params.inputs.size() <= 4) { jit.AddConstant(MakeJitConstant("STATIC_SCALE_VALUE_INV", std::sqrt(static_cast(params.conf.head_size)))); jit.AddConstant(MakeJitConstant("STATIC_SCALE_VALUE", 1.0f / std::sqrt(static_cast(params.conf.head_size)))); @@ -218,11 +222,11 @@ CommonDispatchData SDPAKernelOpt::SetDefault(const sdpa_params& params, size_t k if (params.conf.is_paged_attention) { OPENVINO_ASSERT(kernel_idx == KernelsTypes::MULTI_TOKENS); - const size_t sg_num_scale = get_sg_number_scale_factor(params, kernel_idx); const size_t heads_num = static_cast(params.conf.heads_num); + const size_t head_size = static_cast(params.conf.head_size); + const size_t sg_num_scale = get_sg_number_scale_factor(params, head_size, kernel_idx); const size_t target_seq_len_block_size = get_target_seq_len_block_size(); const size_t target_seq_len = static_cast(params.conf.paged_attention_aligned_seq_len); - const size_t head_size = static_cast(params.conf.head_size); dispatch_data.gws = { heads_num, CeilDiv(target_seq_len, target_seq_len_block_size), @@ -243,13 +247,13 @@ CommonDispatchData SDPAKernelOpt::SetDefault(const sdpa_params& params, size_t k const size_t target_seq_len_block_size = kernel_idx == 1 ? get_target_seq_len_block_size() : 1; if (kernel_idx == KernelsTypes::SINGLE_TOKEN) { - const size_t sg_num_scale = get_sg_number_scale_factor(params, kernel_idx); + const size_t sg_num_scale = get_sg_number_scale_factor(params, head_size, kernel_idx); dispatch_data.gws = { batch_size * heads_num, CeilDiv(target_seq_len, target_seq_len_block_size), head_size * num_of_partitions * sg_num_scale }; dispatch_data.lws = { 1, 1, head_size * sg_num_scale }; } else if (kernel_idx == KernelsTypes::MULTI_TOKENS) { - const size_t sg_num_scale = get_sg_number_scale_factor(params, kernel_idx); + const size_t sg_num_scale = get_sg_number_scale_factor(params, head_size, kernel_idx); dispatch_data.gws = { batch_size * heads_num, CeilDiv(target_seq_len, target_seq_len_block_size), head_size * sg_num_scale }; @@ -317,7 +321,7 @@ KernelsData SDPAKernelOpt::GetKernelsData(const Params& params) const { false, inputs_num, GetFusedPrimitiveInputsCount(params), - static_cast(prim_params.outputs.size()), + 1 /* number_of_outputs */, prim_params.is_shape_agnostic); auto beam_table_idx = prim_params.inputs.size(); @@ -339,6 +343,19 @@ KernelsData SDPAKernelOpt::GetKernelsData(const Params& params) const { kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 2}); + if (prim_params.conf.is_paged_attention && prim_params.outputs.size() > 1) { + // Intermediate buffers for PagedAttention scores calculation: + // softmax_results, subsequence_offsets, exp_sums, max_logits, tmp_out + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 3}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 4}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 5}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 6}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 7}); + + // Scalar used for proper offset calculation of intermediate data buffers + kernel.params.arguments.push_back({ArgumentDescriptor::Types::SCALAR, 0}); + } + const auto buf_sizes = get_internal_buffer_sizes(prim_params, kernel_idx); if (!prim_params.conf.is_paged_attention) { kd.internalBufferSizes.clear(); @@ -379,6 +396,15 @@ void SDPAKernelOpt::GetUpdateDispatchDataFunc(KernelData& kd) const { kernel_data.kernels[0].params.workGroups.global = dispatch_data.gws; kernel_data.kernels[0].params.workGroups.local = dispatch_data.lws; kernel_data.kernels[0].skip_execution = false; + + if (prim_params.outputs.size() > 1) { + const auto max_seq_len = prim_params.conf.paged_attention_max_len; + const auto seq_len_partition_size = get_seq_len_partition_size(params, prim_params.conf.head_size, KernelsTypes::MULTI_TOKENS); + + kernel_data.kernels[0].params.scalars.resize(1); + kernel_data.kernels[0].params.scalars[0].t = ScalarDescriptor::Types::UINT32; + kernel_data.kernels[0].params.scalars[0].v.u32 = static_cast(Align(max_seq_len, seq_len_partition_size)); + } } else { const auto num_of_partitions = get_partitions_num(prim_params, KernelsTypes::SINGLE_TOKEN); const auto buf_sizes = get_internal_buffer_sizes(prim_params, KernelsTypes::SINGLE_TOKEN); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.h index 8d7279f5546112..a4d351498d7075 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.h @@ -17,6 +17,9 @@ class SDPAKernelOpt : public SDPAKernelBase { KernelsPriority GetKernelsPriority(const Params& params) const override; ParamsKey GetSupportedKey() const override; + static size_t get_sg_number_scale_factor(const Params& params, size_t head_size, size_t kernel_type); + static size_t get_seq_len_partition_size(const Params& params, size_t head_size, size_t kernel_type); + protected: bool Validate(const Params& p) const override; void GetUpdateDispatchDataFunc(KernelData& kd) const override; diff --git a/src/plugins/intel_gpu/src/kernel_selector/primitive_db.cpp b/src/plugins/intel_gpu/src/kernel_selector/primitive_db.cpp index cd8128baff37c9..e9fa5dd675629a 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/primitive_db.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/primitive_db.cpp @@ -21,16 +21,28 @@ namespace cache { primitive_db::primitive_db() : primitives({ #include "ks_primitive_db.inc" + }), + cm_primitives({ +#include "ks_cm_primitive_db.inc" }), batch_headers({ #include "ks_primitive_db_batch_headers.inc" + }), + cm_batch_headers({ +#include "ks_cm_primitive_db_batch_headers.inc" }) { } -std::vector primitive_db::get(const primitive_id& id) const { +std::vector primitive_db::get(const primitive_id& id, bool is_cm) const { #ifndef NDEBUG { - std::ifstream kernel_file{id + ".cl", std::ios::in | std::ios::binary}; + std::string filename = id; + if (!is_cm) { + filename += ".cl"; + } else { + filename += ".cpp"; + } + std::ifstream kernel_file{filename, std::ios::in | std::ios::binary}; if (kernel_file.is_open()) { code ret; auto beg = kernel_file.tellg(); @@ -46,7 +58,11 @@ std::vector primitive_db::get(const primitive_id& id) const { } #endif try { - const auto codes = primitives.equal_range(id); + auto* primitives_ptr = &primitives; + if (is_cm) { + primitives_ptr = &cm_primitives; + } + const auto codes = primitives_ptr->equal_range(id); std::vector temp; std::for_each(codes.first, codes.second, [&](const std::pair& c) { temp.push_back(c.second); diff --git a/src/plugins/intel_gpu/src/kernel_selector/primitive_db.h b/src/plugins/intel_gpu/src/kernel_selector/primitive_db.h index e384f6c9879fb5..5c6987246ce1f4 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/primitive_db.h +++ b/src/plugins/intel_gpu/src/kernel_selector/primitive_db.h @@ -21,8 +21,9 @@ using primitive_id = std::string; struct primitive_db { primitive_db(); - std::vector get(const primitive_id& id) const; + std::vector get(const primitive_id& id, bool is_cm = false) const; std::map get_batch_headers() const { return std::move(batch_headers); } + std::map get_cm_batch_headers() const { return std::move(cm_batch_headers); } private: struct case_insensitive_compare { @@ -35,7 +36,9 @@ struct primitive_db { } }; std::multimap primitives; + std::multimap cm_primitives; std::map batch_headers; + std::map cm_batch_headers; }; } // namespace cache diff --git a/src/plugins/intel_gpu/src/kernel_selector/primitive_db_gen.py b/src/plugins/intel_gpu/src/kernel_selector/primitive_db_gen.py index 116844f3bccfc7..393e67f3bdb6aa 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/primitive_db_gen.py +++ b/src/plugins/intel_gpu/src/kernel_selector/primitive_db_gen.py @@ -6,21 +6,30 @@ # the trailing characters are a tag to allow multiple primitive implementations from __future__ import print_function +from enum import Enum import os import argparse import glob import ntpath import re -class OpenCL2CHeaders(object): +class KernelLang(Enum): + OCLC = 0 + CM = 1 + def header_extension(self): + return (".cl", ".h")[self.value] + def source_extension(self): + return (".cl", ".cpp")[self.value] +class Kernels2CHeaders(object): - def __init__(self, kernels_folder, out_path, out_file_name_prim_db, out_file_name_batch_headers): + def __init__(self, kernels_folder, out_path, out_file_name_prim_db, out_file_name_batch_headers, kernel_lang): self.kernels_folder = os.path.abspath(kernels_folder) self.out_path = os.path.abspath(out_path) self.out_file_name_prim_db = out_file_name_prim_db self.out_file_name_batch_headers = out_file_name_batch_headers self.include_files = {} self.batch_headers = [] + self.kernel_lang = kernel_lang self.find_and_set_batch_headers() # NOTE: batch_headers are headers with macros on which the runtime jitter might depend on. @@ -29,7 +38,7 @@ def __init__(self, kernels_folder, out_path, out_file_name_prim_db, out_file_nam # specially for improving the jit compilation performance, i.e., # they are not to be included in each kernel, but to be included only once at the beginning of each batch. def find_and_set_batch_headers(self): - batch_headers_list = [ntpath.basename(h) for h in glob.glob(os.path.join(self.kernels_folder, "include/batch_headers/*.cl"))] + batch_headers_list = [ntpath.basename(h) for h in glob.glob(os.path.join(self.kernels_folder, "include/batch_headers/*" + self.kernel_lang.header_extension()))] deps = {} for h in batch_headers_list: header_file = os.path.abspath(os.path.join(self.kernels_folder, "include/batch_headers", h)) @@ -56,11 +65,11 @@ def topological_sort(self, cur_key, items, stack, res): def convert(self): res = '// This file is autogenerated by primitive_db_gen.py, all changes to this file will be undone\n\n' - filelist = glob.glob(os.path.join(self.kernels_folder, "*.cl")) + filelist = glob.glob(os.path.join(self.kernels_folder, "*" + self.kernel_lang.source_extension())) for filename in filelist: #try: print('processing {}'.format(filename)) - res += self.cl_file_to_str(filename) + res += self.kernel_file_to_str(filename) #except: # pass out_file_name_prim_db = os.path.join(self.out_path, self.out_file_name_prim_db) @@ -198,8 +207,8 @@ def batch_headers_to_str(self): characters = 1 # Newline character above res = "" for h in self.batch_headers: - header_name = h[:h.find('.cl')] - res += '{{"{}",\n(std::string) R"(\n'.format(header_name) + header_name = h[:h.rfind('.')] + res += '{{"{}",\n(std::string) R"-(\n'.format(header_name) header_file = os.path.abspath(os.path.join(os.path.dirname(self.kernels_folder + "/include/batch_headers"), "batch_headers/" + h)) content = [] with open(header_file) as f: @@ -208,11 +217,11 @@ def batch_headers_to_str(self): if line.startswith('#include'): continue if (i + 1) % max_lines == 0 or characters + len(line) + 1 > max_characters: - res += ')"\n + (std::string) R"(' + res += ')-"\n + (std::string) R"-(' characters = 0 res += '{}\n'.format(line.rstrip()) characters += len(line) + 1 - res += ')"},\n\n' + res += ')-"},\n\n' return self.post_process_sources(res) def post_process_sources(self, content): @@ -241,10 +250,10 @@ def comment_replacer(match): return content - def cl_file_to_str(self, filename): + def kernel_file_to_str(self, filename): name = ntpath.basename(filename) self.include_files[filename] = {} - kernel_name = name[:name.find('.cl')] + kernel_name = name[:name.rfind('.')] res = '{{"{}",\n(std::string) R"__krnl(\n'.format(kernel_name) content = self.append_file_content(filename, filename) content += self.append_undefs(filename) @@ -265,16 +274,17 @@ def cl_file_to_str(self, filename): return res - def main(): ap = argparse.ArgumentParser() ap.add_argument('-kernels', required=True, metavar='PATH', help='The absolute path to OpenCL kernels folder') ap.add_argument('-out_path', required=True, metavar='PATH', help='The absolute path to dump file') ap.add_argument('-out_file_name_prim_db', required=True, metavar='PATH', help='dump file name') ap.add_argument('-out_file_name_batch_headers', required=True, metavar='PATH', help='dump file name') + ap.add_argument('-cm', required=False, action='store_true', help='Process CM kernel sources instead of ocl c') args = ap.parse_args() - converter = OpenCL2CHeaders(args.kernels, args.out_path, args.out_file_name_prim_db, args.out_file_name_batch_headers) + kernel_lang = KernelLang.CM if args.cm else KernelLang.OCLC + converter = Kernels2CHeaders(args.kernels, args.out_path, args.out_file_name_prim_db, args.out_file_name_batch_headers, kernel_lang) converter.convert() if __name__ == '__main__': diff --git a/src/plugins/intel_gpu/src/plugin/ops/fake_convert.cpp b/src/plugins/intel_gpu/src/plugin/ops/fake_convert.cpp new file mode 100644 index 00000000000000..282a483deab189 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/ops/fake_convert.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/plugin/program_builder.hpp" +#include "intel_gpu/plugin/common_utils.hpp" + +#include "openvino/op/fake_convert.hpp" + +#include "intel_gpu/primitives/fake_convert.hpp" + +namespace ov { +namespace intel_gpu { +static void CreateFakeConvertOp(ProgramBuilder& p, const std::shared_ptr& op) { + validate_inputs_count(op, {2, 3}); + const auto inputs = p.GetInputInfo(op); + const std::string layerName = layer_type_name_ID(op); + ov::element::Type destination_type = op->get_destination_element_type(); + std::shared_ptr fake_convert_prim = nullptr; + if (inputs.size() == 2) { + fake_convert_prim = std::make_shared(layerName, + inputs[0], + inputs[1], + destination_type); + } else { + fake_convert_prim = std::make_shared(layerName, + inputs[0], + inputs[1], + inputs[2], + destination_type); + } + + p.add_primitive(*op, fake_convert_prim); +} + +REGISTER_FACTORY_IMPL(v13, FakeConvert); + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/paged_attention.cpp b/src/plugins/intel_gpu/src/plugin/ops/paged_attention.cpp index 7425b096b6d324..d82d3a66fed7f7 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/paged_attention.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/paged_attention.cpp @@ -61,10 +61,13 @@ static void CreatePagedAttentionExtensionOp(ProgramBuilder& p, const std::shared OPENVINO_ASSERT(alibi_const != nullptr); prim.has_alibi = ov::shape_size(alibi_const->get_output_shape(0)) > 0; + prim.num_outputs = 1; if (op->get_output_size() > 1) { const auto scores_output_idx = 1; const auto& users = op->get_output_target_inputs(scores_output_idx); - OPENVINO_ASSERT(users.size() == 0, "[GPU] PagedAttention implementation doesn't support scores output yet"); + if (users.size() > 0) { + prim.num_outputs++; // Add scores output + } } p.add_primitive(*op, prim); diff --git a/src/plugins/intel_gpu/src/plugin/transformations/indirect_kv_cache.hpp b/src/plugins/intel_gpu/src/plugin/transformations/indirect_kv_cache.hpp index 0fc96b6215ba95..f76edeeb4f20da 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/indirect_kv_cache.hpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/indirect_kv_cache.hpp @@ -38,7 +38,7 @@ namespace intel_gpu { /// └───────────┘ └───────────────┘ class IndirectKVCache : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("IndirectKVCache", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("IndirectKVCache"); IndirectKVCache(); }; diff --git a/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_compression.hpp b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_compression.hpp index 1587021a03ed36..036fdb78914891 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_compression.hpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_compression.hpp @@ -32,7 +32,7 @@ namespace intel_gpu { class KVCacheCompression : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("KVCacheCompression", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("KVCacheCompression"); KVCacheCompression(ov::element::Type compression_dt); bool run_on_model(const std::shared_ptr& m) override; diff --git a/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.hpp b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.hpp index dbe147da8d46b7..614d3ba5020363 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.hpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.hpp @@ -76,7 +76,7 @@ namespace intel_gpu { /// └─────────────┘ └───────────┘ └─────────┘ class KVCacheFusion : public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("KVCacheFusion", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("KVCacheFusion"); KVCacheFusion(); bool run_on_model(const std::shared_ptr& m) override; diff --git a/src/plugins/intel_gpu/src/plugin/transformations/transpose_fusion.hpp b/src/plugins/intel_gpu/src/plugin/transformations/transpose_fusion.hpp index a845c7a7aa86b0..3a985a33c722df 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/transpose_fusion.hpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/transpose_fusion.hpp @@ -11,7 +11,7 @@ namespace intel_gpu { class TransposeFusion: public ov::pass::GraphRewrite { public: - OPENVINO_RTTI("TransposeFusion", "0"); + OPENVINO_GRAPH_REWRITE_RTTI("TransposeFusion"); TransposeFusion(bool supports_immad = false); }; diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 53ab9aa188b7aa..7c7c09adcd182f 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -92,6 +92,7 @@ #include "transformations/common_optimizations/lstm_cell_fusion.hpp" #include "transformations/common_optimizations/move_eltwise_up_data_movement.hpp" #include "transformations/common_optimizations/mvn_fusion.hpp" +#include "transformations/common_optimizations/sdpa_scale_fusion.hpp" #include "transformations/common_optimizations/softmax_fusion.hpp" #include "transformations/common_optimizations/glu_fusion.hpp" #include "transformations/common_optimizations/transpose_sinking.hpp" @@ -941,6 +942,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { if (!disable_horizontal_fc_fusion) manager.register_pass(); + manager.register_pass(); manager.register_pass(); auto pass_config = manager.get_pass_config(); manager.register_pass(); diff --git a/src/plugins/intel_gpu/src/runtime/layout.cpp b/src/plugins/intel_gpu/src/runtime/layout.cpp index a2b7e62ea0cae2..5c6c6dc83aeaea 100644 --- a/src/plugins/intel_gpu/src/runtime/layout.cpp +++ b/src/plugins/intel_gpu/src/runtime/layout.cpp @@ -446,8 +446,6 @@ bool layout::compatible(const layout& other) const { if (l1.is_dynamic() || l2.is_dynamic()) return false; - auto l1_size = l1.get_tensor(); - auto l2_size = l2.get_tensor(); if (l1 == l2) return true; if (check_redundant_1d_along_feature(l1, l2)) @@ -459,7 +457,7 @@ bool layout::compatible(const layout& other) const { if (format::is_default_format(l1.format) && format::is_default_format(l2.format) && !l1.data_padding && !l2.data_padding && l1.get_linear_size() == l2.get_linear_size()) return true; - if (l1_size != l2_size) + if (l1.get_shape() != l2.get_shape()) return false; if (l1.get_linear_size() != l2.get_linear_size()) return false; @@ -505,6 +503,19 @@ bool layout::compatible(const layout& other) const { auto l1_pitch = l1.get_pitches(); auto l2_pitch = l2.get_pitches(); + auto l1_padded_dims = l1.get_padded_dims(); + auto l2_padded_dims = l2.get_padded_dims(); + + // Ignore pitches which will never be used (for padded dims with size == 1) + for (size_t i = 0; i < l1_padded_dims.size(); ++i) { + if (l1_padded_dims[i] == 1) { + l1_pitch[i] = 0; + } + if (l2_padded_dims[i] == 1) { + l2_pitch[i] = 0; + } + } + auto l1_offset = l1.get_linear_offset(); auto l2_offset = l2.get_linear_offset(); if (l1_pitch == l2_pitch && l1_offset == l2_offset) diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/fake_convert.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/fake_convert.cpp new file mode 100644 index 00000000000000..d1236f5c524421 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/fake_convert.cpp @@ -0,0 +1,141 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/ov_tensor_utils.hpp" +#include "common_test_utils/file_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" + +#include "openvino/op/parameter.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/fake_convert.hpp" + +namespace { + +namespace fp8 { +constexpr float MAX_F8E4M3 = 448.f; +constexpr float MAX_F8E5M2 = 57344.f; +} // namespace fp8 + +using namespace std; +using namespace ov; +using namespace testing; +using ov::test::InputShape; + +using FakeConvertTestParams = std::tuple< + ov::Shape, // Input shapes + ov::Shape, // Scale shape + ov::Shape, // Shift shape + ov::element::Type, // input precision + ov::element::Type, // destination type + std::string >; // device name + +class FakeConvertTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + ov::Shape input_shape; + ov::Shape scale_shape; + ov::Shape shift_shape; + ov::element::Type prec; + ov::element::Type destination_type; + std::string target_device; + + std::tie(input_shape, scale_shape, shift_shape, prec, destination_type, target_device) = obj.param; + + std::ostringstream result; + result << "IS=("; + result << ov::test::utils::vec2str(input_shape) << "_"; + result << "scale_shape=" << ov::test::utils::vec2str(scale_shape) << "_"; + result << "shift_shape=" << ov::test::utils::vec2str(shift_shape) << "_"; + result << "input_precision=" << prec << "_"; + result << "destination_type=" << destination_type << "_"; + result << "device_type=" << target_device; + return result.str(); + } + +protected: + ov::Shape input_shape, scale_shape, shift_shape; + ov::element::Type destination_type; + + void SetUp() override { + ov::element::Type prec; + std::tie(input_shape, scale_shape, shift_shape, prec, destination_type, targetDevice) = GetParam(); + const float MAX_FP8 = (destination_type == ov::element::f8e4m3) ? fp8::MAX_F8E4M3 : fp8::MAX_F8E5M2; + if (shift_shape.empty()) { + auto data = make_shared(prec, input_shape); + auto scale = op::v0::Constant::create(prec, + scale_shape, + {MAX_FP8 / (MAX_FP8 / 2.f), + 1.0f, + MAX_FP8 / (MAX_FP8 * 3.5f), + MAX_FP8 / (MAX_FP8 * 4.f)}); + + auto op = make_shared(data, scale, destination_type); + + function = make_shared(OutputVector{op}, ParameterVector{data}); + } else { + auto data = make_shared(prec, input_shape); + auto scale = op::v0::Constant::create(prec, + scale_shape, + {MAX_FP8 / (MAX_FP8 / 2.f), + 1.0f, + MAX_FP8 / (MAX_FP8 * 3.5f), + MAX_FP8 / (MAX_FP8 * 4.f)}); + auto shift = op::v0::Constant::create(prec, shift_shape, {0.f, 0.f, 0.f, 0.f}); + + auto op = make_shared(data, scale, shift, destination_type); + + function = make_shared(OutputVector{op}, ParameterVector{data}); + } + } + + void generate_inputs(const std::vector& target_shapes) override { + inputs.clear(); + const float MAX_FP8 = (destination_type == ov::element::f8e4m3) ? fp8::MAX_F8E4M3 : fp8::MAX_F8E5M2; + const auto& func_inputs = function->inputs(); + auto& data_input = func_inputs[0]; + ov::Tensor tensor = ov::Tensor(data_input.get_element_type(), target_shapes[0]); + std::vector input_data{MAX_FP8 / 4.f, + MAX_FP8 / 3.f, + MAX_FP8 / 2.f, + MAX_FP8, + MAX_FP8, + MAX_FP8, + MAX_FP8 * 1.2f, + MAX_FP8 * 2.3f, + MAX_FP8 * 3.4f, + MAX_FP8 * 2.f, + MAX_FP8 * 3.f, + MAX_FP8 * 4.f}; + auto* data_ptr = tensor.data(); + for (size_t i = 0; i < input_data.size(); i++) { + data_ptr[i] = input_data[i]; + } + inputs.insert({data_input.get_node_shared_ptr(), tensor}); + } +}; + +TEST_P(FakeConvertTest, Inference) { + run(); +} + +const std::vector input_precisions = {ov::element::f32}; + +const std::vector input_shapes = {{4, 3}}; + +const ov::Shape scale_shape = {4, 1}; +const std::vector shift_shapes = {{4, 1}, {}}; +const std::vector destination_types = {ov::element::f8e4m3, ov::element::f8e5m2}; + +INSTANTIATE_TEST_SUITE_P(Smoke_FakeConvertTest, + FakeConvertTest, + ::testing::Combine(::testing::ValuesIn(input_shapes), + ::testing::Values(scale_shape), + ::testing::ValuesIn(shift_shapes), + ::testing::ValuesIn(input_precisions), + ::testing::ValuesIn(destination_types), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + FakeConvertTest::getTestCaseName); +} // namespace diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/impls_registry_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/impls_registry_test.cpp index a16cd20846a1c7..5dfc450e43905a 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/impls_registry_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/impls_registry_test.cpp @@ -85,6 +85,7 @@ #include "intel_gpu/primitives/swiglu.hpp" #include "intel_gpu/primitives/tile.hpp" #include "intel_gpu/primitives/unique.hpp" +#include "intel_gpu/primitives/fake_convert.hpp" #include "primitive_inst.h" #include "test_utils.h" @@ -226,5 +227,6 @@ TEST(registry_test, no_null_impls) { cldnn::unique_count, cldnn::unique_gather, cldnn::scaled_dot_product_attention, - cldnn::rope>(); + cldnn::rope, + cldnn::fake_convert>(); } diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp index 7c666819176a13..279a86c73f55bf 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp @@ -261,6 +261,10 @@ INSTANTIATE_TEST_SUITE_P(smoke, layout_cmp_test, layout{ov::PartialShape{4, 2, 3, 4, 5}, data_types::f16, format::is_os_zyx_isv16_osv16}, false, false}, {layout{ov::PartialShape{4, 2, 3, 4, 5}, data_types::f16, format::goiyx}, layout{ov::PartialShape{4, 2, 3, 4, 5}, data_types::f16, format::gioyx}, false, false}, + {layout{ov::PartialShape{4, 1, 16, 16}, data_types::f16, format::bfyx}, + layout{ov::PartialShape{4, 1, 16, 16}, data_types::f16, format::byxf}, false, true}, + {layout{ov::PartialShape{2, 1, 2, 4}, data_types::f16, format::bfyx, padding({0, 0, 1, 0}, {0, 0, 1, 0})}, + layout{ov::PartialShape{2, 1, 2, 4}, data_types::f16, format::bfyx, padding({0, 1, 0, 0}, {0, 0, 0, 0})}, false, false}, })); struct layouts_transform_test_params { diff --git a/src/plugins/intel_gpu/tests/unit/passes/add_required_reorders_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/add_required_reorders_test.cpp index 9a4cb71450a53c..0eb425b4dc1119 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/add_required_reorders_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/add_required_reorders_test.cpp @@ -192,9 +192,9 @@ TEST(add_required_reorders, skip_adding_reorder_batch_axis_padding) { crop_prim = network.get_primitive("crop2"); ASSERT_EQ(crop_prim->can_be_optimized(), true); auto reorder_prim = network.get_primitive("crop1_reorder"); - ASSERT_EQ(reorder_prim->can_be_optimized(), true); + ASSERT_EQ(reorder_prim->can_be_optimized(), false); reorder_prim = network.get_primitive("crop2_reorder"); - ASSERT_EQ(reorder_prim->can_be_optimized(), true); + ASSERT_EQ(reorder_prim->can_be_optimized(), false); auto concate = network.get_primitive("concat"); ASSERT_EQ(concate->can_be_optimized(), false); } diff --git a/src/plugins/intel_gpu/tests/unit/passes/mark_shape_of_subgraphs_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/mark_shape_of_subgraphs_test.cpp index 493ab79bf8e2cb..ee4382e51645cd 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/mark_shape_of_subgraphs_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/mark_shape_of_subgraphs_test.cpp @@ -318,3 +318,108 @@ TEST(mark_shape_of_subgraphs, gather_compressed_no_mark) { ASSERT_FALSE(check_subgraph(prog->get_node("shape_of"), prog->get_node("gather_compressed"))); ASSERT_FALSE(check_subgraph(prog->get_node("shape_of"), prog->get_node("concat"))); } + +TEST(mark_shape_of_subgraphs, broadcast_not_existed_after_shapeof) { + auto& engine = get_test_engine(); + auto input_layout_dynamic = layout{ov::PartialShape{ov::Dimension::dynamic(), 4, ov::Dimension::dynamic(), ov::Dimension::dynamic()}, + data_types::f32, format::bfyx}; + auto data_0 = engine.allocate_memory({ ov::PartialShape{4}, data_types::i32, format::bfyx }); + set_values(data_0, {1, 4, 1, 1}); + auto weights = engine.allocate_memory({ data_types::f16, format::bfyx, {1152, 4, 1, 1} }); + + topology topology; + topology.add(input_layout("input", input_layout_dynamic)); + topology.add(data("data_0", data_0)); + topology.add(data("weights", weights)); + topology.add(shape_of("shape_of", input_info("input"), data_types::i32)); + topology.add(reshape("reshape", input_info("shape_of"), input_info("data_0"), false, {})); + topology.add(convolution("convolution", input_info("reshape"), "weights", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false)); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); + + auto prog = network.get_program(); + ASSERT_NE(prog, nullptr); + + ASSERT_TRUE(check_subgraph(prog->get_node("shape_of"), prog->get_node("convolution"))); +} + +TEST(mark_shape_of_subgraphs, broadcast_w_data_and_direct_shapeof_no_mark) { + auto& engine = get_test_engine(); + auto input_layout_dynamic = layout{ov::PartialShape{ov::Dimension::dynamic(), 4, ov::Dimension::dynamic(), ov::Dimension::dynamic()}, + data_types::f32, format::bfyx}; + auto data_0 = engine.allocate_memory({ ov::PartialShape{1}, data_types::i32, format::bfyx }); + set_values(data_0, {0}); + auto weights = engine.allocate_memory({ data_types::f16, format::bfyx, {1152, 4, 2, 2} }); + + topology topology; + topology.add(input_layout("input", input_layout_dynamic)); + topology.add(data("data_0", data_0)); + topology.add(shape_of("shape_of", input_info("input"), data_types::i32)); + topology.add(broadcast("broadcast", input_info("data_0"), input_info("shape_of"), {}, ov::op::BroadcastType::BIDIRECTIONAL)); + topology.add(data("weights", weights)); + topology.add(convolution("convolution", input_info("broadcast"), "weights", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false)); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); + + auto prog = network.get_program(); + ASSERT_NE(prog, nullptr); + + ASSERT_FALSE(check_subgraph(prog->get_node("shape_of"), prog->get_node("convolution"))); + ASSERT_FALSE(check_subgraph(prog->get_node("shape_of"), prog->get_node("broadcast"))); +} + +TEST(mark_shape_of_subgraphs, broadcast_w_data_and_indirect_shapeof) { + auto& engine = get_test_engine(); + auto input_layout_dynamic = layout{ov::PartialShape{ov::Dimension::dynamic(), 4, ov::Dimension::dynamic(), ov::Dimension::dynamic()}, + data_types::f32, format::bfyx}; + auto data_0 = engine.allocate_memory({ ov::PartialShape{1}, data_types::i32, format::bfyx }); + set_values(data_0, {0}); + + topology topology; + topology.add(input_layout("input", input_layout_dynamic)); + topology.add(data("data_0", data_0)); + topology.add(shape_of("shape_of", input_info("input"), data_types::i32)); + topology.add(gather("gather", input_info("shape_of"), input_info("data_0"), 0, 0, {})); + topology.add(broadcast("broadcast", input_info("data_0"), input_info("gather"), {}, ov::op::BroadcastType::BIDIRECTIONAL)); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); + + auto prog = network.get_program(); + ASSERT_NE(prog, nullptr); + + ASSERT_TRUE(check_subgraph(prog->get_node("shape_of"), prog->get_node("broadcast"))); +} + +TEST(mark_shape_of_subgraphs, broadcast_w_direct_shapeof_and_data) { + auto& engine = get_test_engine(); + auto input_layout_dynamic = layout{ov::PartialShape{ov::Dimension::dynamic(), 4, ov::Dimension::dynamic(), ov::Dimension::dynamic()}, + data_types::f32, format::bfyx}; + auto target_shape = engine.allocate_memory({ ov::PartialShape{4}, data_types::i32, format::bfyx }); + set_values(target_shape, {4, 4, 1, 1}); + + topology topology; + topology.add(input_layout("input", input_layout_dynamic)); + topology.add(data("target_shape", target_shape)); + topology.add(shape_of("shape_of", input_info("input"), data_types::i32)); + topology.add(broadcast("broadcast", input_info("shape_of"), input_info("target_shape"), {}, ov::op::BroadcastType::BIDIRECTIONAL)); + topology.add(reshape("reshape", input_info("input"), input_info("broadcast"), false, ov::PartialShape{4, 4, 1, 1})); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); + + auto prog = network.get_program(); + ASSERT_NE(prog, nullptr); + + ASSERT_TRUE(check_subgraph(prog->get_node("shape_of"), prog->get_node("broadcast"))); +} diff --git a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp index 456fab4ae0286a..1eb11c662608e0 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp @@ -1224,7 +1224,7 @@ TEST(prepare_buffer_fusing, test_implicit_crop_and_outerpadding) { auto reorder_prim = network.get_primitive("gather1_reorder"); ASSERT_EQ(reorder_prim->can_be_optimized(), true); reorder_prim = network.get_primitive("gather2_reorder"); - ASSERT_EQ(reorder_prim->can_be_optimized(), true); + ASSERT_EQ(reorder_prim->can_be_optimized(), false); auto reshape_prim = network.get_primitive("reshape1"); ASSERT_EQ(reshape_prim->can_be_optimized(), true); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp index f0243f055c3670..13934020bfdf66 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp @@ -10820,7 +10820,14 @@ TEST_P(conv_dyn_test, convolution_gpu_fsv16_1x1_no_bias) { return outputs_ref.at("conv").get_memory(); }; - auto in_layout = layout{ov::PartialShape{ov::Dimension(), ov::Dimension(p.in_shape[1]), ov::Dimension(), ov::Dimension()}, data_types::f16, format::b_fs_yx_fsv16}; + cldnn::layout in_layout; + if (p.in_shape[2] % 2 == 0) { + // input feature is static + in_layout = layout{ov::PartialShape{ov::Dimension(), ov::Dimension(p.in_shape[1]), ov::Dimension(), ov::Dimension()}, data_types::f16, format::b_fs_yx_fsv16}; + } else { + // input feature is dynamic + in_layout = layout{ov::PartialShape{ov::Dimension(), ov::Dimension(), ov::Dimension(), ov::Dimension()}, data_types::f16, format::b_fs_yx_fsv16}; + } auto input = engine.allocate_memory({ p.in_shape, data_types::f16, format::b_fs_yx_fsv16 }); auto weights = engine.allocate_memory({p.wei_shape, data_types::f16, is_grouped ? format::bfzyx : format::bfyx}); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index f59dc5c42cffc1..5bc7e403d3bf74 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -4137,6 +4137,10 @@ TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dyn_quan_dynamic_f_input this->test_compressed_int4_scale_dyn_quan(false, true, 511, true); } +TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_quantize_batch_1) { + this->test_compressed_int4_scale_dyn_quan_weight_i4(true, 1, 2048, 3072); +} + TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_quantize_edge_case) { this->test_compressed_int4_scale_dyn_quan_weight_i4(true, 359, 1536, 2560); } @@ -4827,3 +4831,52 @@ TEST_F(fully_connected_gpu_tests, weights_reorder_shapes_update) { TEST_F(fully_connected_gpu_tests, weights_reorder_shapes_update_cached) { this->test_weights_reorder_shapes_update(true); } + +TEST(fully_connected_gpu, cm) { + int min_random = -2, max_random = 2; + auto& engine = get_test_engine(); + ExecutionConfig config = get_test_default_config(engine); + + if (!cldnn::check_cm_jit_support(engine, config)) { + GTEST_SKIP(); + } + + // Test parameters + const int batch_num = 2; + const int output_f = 4; + const int input_x = 1; + const int input_y = 1; + const int input_f = 3; + + // Allocate memory + auto input_prim = engine.allocate_memory({ data_types::f16, format::bfyx, { batch_num, input_f, input_y, input_x } }); + auto weights_prim = engine.allocate_memory({ data_types::f16, format::oiyx, { output_f, input_f, input_y, input_x } }); + auto bias_prim = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 1, output_f, 1 } }); + + // Generate random input data and set values + tests::random_generator rg(GET_SUITE_NAME); + auto input_data = rg.generate_random_4d(batch_num, input_f, input_y, input_x, min_random, max_random); + auto weights_data = rg.generate_random_4d(output_f, input_f, input_y, input_x, min_random, max_random); + auto bias_data = rg.generate_random_1d(output_f, min_random, max_random); + + auto input_data_bfyx = flatten_4d(format::bfyx, input_data); + auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); + set_values(input_prim, input_data_bfyx); + set_values(weights_prim, weights_data_bfyx); + set_values(bias_prim, bias_data); + topology topology( + input_layout("input", input_prim->get_layout()), + data("weights", weights_prim), + data("bias", bias_prim), + fully_connected("fc_prim", input_info("input"), "weights", "bias") + ); + ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "", impl_types::cm }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); + network network(engine, topology, config); + network.set_input_data("input", input_prim); + auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "fc_prim"); + + // Do not validate output for CM +} diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/paged_attention_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/paged_attention_gpu_test.cpp new file mode 100644 index 00000000000000..a32ef3325cd9bc --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/test_cases/paged_attention_gpu_test.cpp @@ -0,0 +1,687 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils.h" +#include "random_generator.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace cldnn; +using namespace ov::intel_gpu; +using namespace ::tests; + +/* +* PagedAttention inputs: +* [0]: query +* shape: [batch_size_in_tokens, num_heads * head_size], type: f16 +* [1]: key +* shape: [batch_size_in_tokens, num_kv_heads * head_size], type: f16 +* [2]: value  +* shape: [batch_size_in_tokens, num_kv_heads * head_size], type: f16 +* [3]: key_cache +* shape: [num_blocks, num_kv_heads, head_size, block_size], type: f16 +* [4]: value_cache +* shape: [num_blocks, num_kv_heads, block_size, head_size], type: f16 +* [5]: past_lens +* shape: [batch_size_in_sequences], type: i32 +* [6]: subsequence_begins +* shape: [batch_size_in_sequences + 1], type: i32 +* [7]: block_indices +* Shape: [num_blocks], type: i32 +* [8]: block_indices_begins +* Shape: [batch_size_in_sequences + 1], type: i32 +* [9]: scale, optional +* [10]: sliding_window, optional +* [11]: alibi_slopes, optional +* [12]: max_context_len +* shape: [], type: i32 +*/ + +struct SubsequenceDescriptor { + int num_tokens; + int past_len; +}; + +struct PagedAttentionManager { + int num_heads; + int head_size; + int block_size; + std::vector subsequence_descs; + + // per-subsequence QKV inputs + std::vector> query_data; // {[1, num_tokens, num_heads, head_size], ..} + std::vector> key_data; // {[1, past_len + num_tokens, num_heads, head_size], ..} + std::vector> value_data; // {[1, past_len + num_tokens, num_heads, head_size], ..} + + // common PA inputs + std::vector past_lens; + std::vector subsequence_begins; + std::vector block_indices; + std::vector block_indices_begins; + std::vector max_context_len; + + cldnn::engine& test_engine; + cldnn::stream& test_stream; + tests::random_generator& rg; + + PagedAttentionManager(tests::random_generator& rg, + cldnn::engine& engine, + cldnn::stream& stream, + const std::vector& subsequence_descs, + int num_heads, + int head_size, + int block_size) + : num_heads(num_heads) + , head_size(head_size) + , block_size(block_size) + , subsequence_descs(subsequence_descs) + , test_engine(engine) + , test_stream(stream) + , rg(rg) { + // init subsequence_begins and block_indices_begins + subsequence_begins.push_back(0); + block_indices_begins.push_back(0); + + int max_len = 0; + for (int i = 0; i < static_cast(subsequence_descs.size()); i++) { + const auto& subsequence_desc = subsequence_descs[i]; + max_len = std::max(max_len, subsequence_desc.num_tokens + subsequence_desc.past_len); + + query_data.push_back(generate_input_data(rg, num_heads, subsequence_desc.num_tokens, head_size)); + key_data.push_back(generate_input_data(rg, num_heads, subsequence_desc.num_tokens + subsequence_desc.past_len, head_size)); + value_data.push_back(generate_input_data(rg, num_heads, subsequence_desc.num_tokens + subsequence_desc.past_len, head_size)); + + past_lens.push_back(subsequence_desc.past_len); + int subsequence_start_pos = subsequence_begins[i]; + int subsequence_end_pos = subsequence_start_pos + subsequence_desc.num_tokens; + subsequence_begins.push_back(subsequence_end_pos); + + int subsequence_length = subsequence_desc.num_tokens + subsequence_desc.past_len; + int required_blocks = ceil_div(subsequence_length, block_size); + int start_block_idx = block_indices.empty() ? 0 : block_indices.back() + 1; + int end_block_idx = start_block_idx + required_blocks; + for (int block_idx = start_block_idx; block_idx < end_block_idx; block_idx++) { + block_indices.push_back(block_idx); + } + + int block_indices_start_pos = block_indices_begins[i]; + int block_indices_end_pos = block_indices_start_pos + required_blocks; + block_indices_begins.push_back(block_indices_end_pos); + } + max_context_len.push_back(max_len); + } + + memory::ptr get_query_memory() { + return get_QKV_memory(query_data, false); + } + + memory::ptr get_key_memory() { + return get_QKV_memory(key_data, true); + } + + memory::ptr get_value_memory() { + return get_QKV_memory(value_data, true); + } + + memory::ptr get_key_cache_memory() { + auto num_blocks = block_indices.back() + 1; + auto key_cache_shape = ov::PartialShape{ num_blocks, num_heads, head_size, block_size }; + auto key_cache_layout = layout{ key_cache_shape, data_types::f16, format::bfyx }; + auto memory = test_engine.allocate_memory(key_cache_layout); + + for (int i = 0; i < static_cast(subsequence_descs.size()); i++) { + int past_len = subsequence_descs[i].past_len; + if (past_len != 0) { + int blocks_num = ceil_div(past_len, block_size); + int start_block_idx = block_indices[block_indices_begins[i]]; + for (int block_idx = 0; block_idx < blocks_num; block_idx++) { + int last_token_idx = block_idx == blocks_num - 1 ? past_len % block_size + : block_size; + for (int token_idx = 0; token_idx < last_token_idx; token_idx++) { + for (int head_idx = 0; head_idx < num_heads; head_idx++) { + for (int head_size_idx = 0; head_size_idx < head_size; head_size_idx++) { + size_t input_token_offset = block_idx * block_size + token_idx; + ov::float16* data_ptr = key_data[i].data() + + input_token_offset * num_heads * head_size + + head_idx * head_size + head_size_idx; + + // shape: [num_blocks, num_heads, head_size, block_size] + size_t output_offset = (start_block_idx + block_idx) * num_heads * head_size * block_size + + head_idx * head_size * block_size + + head_size_idx * block_size + + token_idx; + + set_values(test_stream, memory, data_ptr, 1, output_offset); + } + } + } + } + } + } + + return memory; + } + + memory::ptr get_value_cache_memory() { + auto num_blocks = block_indices.back() + 1; + auto value_cache_shape = ov::PartialShape{ num_blocks, num_heads, block_size, head_size }; + auto value_cache_layout = layout{ value_cache_shape, data_types::f16, format::bfyx }; + auto memory = test_engine.allocate_memory(value_cache_layout); + + for (int i = 0; i < static_cast(subsequence_descs.size()); i++) { + int past_len = subsequence_descs[i].past_len; + if (past_len != 0) { + int blocks_num = ceil_div(past_len, block_size); + int start_block_idx = block_indices[block_indices_begins[i]]; + for (int block_idx = 0; block_idx < blocks_num; block_idx++) { + int last_token_idx = block_idx == blocks_num - 1 ? past_len % block_size + : block_size; + for (int token_idx = 0; token_idx < last_token_idx; token_idx++) { + for (int head_idx = 0; head_idx < num_heads; head_idx++) { + size_t input_token_offset = block_idx * block_size + token_idx; + ov::float16* data_ptr = value_data[i].data() + + input_token_offset * num_heads * head_size + + head_idx * head_size; + + // shape: [num_blocks, num_heads, block_size, head_size] + size_t output_offset = (start_block_idx + block_idx) * num_heads * block_size * head_size + + head_idx * block_size * head_size + + token_idx * head_size; + + set_values(test_stream, memory, data_ptr, head_size, output_offset); + } + } + } + } + } + + return memory; + } + + memory::ptr get_past_lens_memory() { + return get_memory_from_vec(past_lens); + } + + memory::ptr get_subsequence_begins_memory() { + return get_memory_from_vec(subsequence_begins); + } + + memory::ptr get_block_indices_memory() { + return get_memory_from_vec(block_indices); + } + + memory::ptr get_block_indices_begins_memory() { + return get_memory_from_vec(block_indices_begins); + } + + memory::ptr get_scale_memory() { + std::vector scale = { ov::float16(get_default_scale()) }; + return get_memory_from_vec(scale); + } + + memory::ptr get_sliding_window_memory() { + std::vector sliding_window = { 0 }; + return get_memory_from_vec(sliding_window); + } + + memory::ptr get_alibi_memory() { + std::vector alibi; + return get_memory_from_vec(alibi); + } + + memory::ptr get_max_context_len_memory() { + return get_memory_from_vec(max_context_len); + } + + float get_default_scale() { + return static_cast(1.f / std::sqrt(head_size)); + } + +private: + template + memory::ptr get_memory_from_vec(std::vector& input_data) { + auto data_size = input_data.empty() ? 1 : input_data.size(); + auto shape = ov::PartialShape{ static_cast(data_size) }; + auto layout = cldnn::layout{ shape, ov::element::from(), format::bfyx }; + auto memory = test_engine.allocate_memory(layout); + + if (input_data.empty()) { + auto shape = ov::PartialShape{0}; + auto layout = cldnn::layout{ shape, ov::element::from(), format::bfyx }; + return test_engine.reinterpret_buffer(*memory, layout); + } + + set_values(test_stream, memory, input_data.data(), input_data.size(), 0); + + return memory; + } + + memory::ptr get_QKV_memory(std::vector>& input_data, bool skip_past_len) { + int total_tokens = 0; + for (const auto& subsequence_desc : subsequence_descs) + total_tokens += subsequence_desc.num_tokens; + + auto query_shape = ov::PartialShape{ total_tokens, num_heads * head_size }; + auto query_layout = layout{ query_shape, data_types::f16, format::bfyx }; + auto memory = test_engine.allocate_memory(query_layout); + + for (int subsequence_idx = 0; subsequence_idx < static_cast(subsequence_descs.size()); subsequence_idx++) { + for (int token_idx = 0; token_idx < subsequence_descs[subsequence_idx].num_tokens; token_idx++) { + for (int head_idx = 0; head_idx < num_heads; head_idx++) { + size_t input_token_offset = token_idx; + // as generated data stored in vectors includes past_len, ignore it for KV inputs + if (skip_past_len) + input_token_offset += subsequence_descs[subsequence_idx].past_len; + + ov::float16* data_ptr = input_data[subsequence_idx].data() + + input_token_offset * num_heads * head_size + + head_idx * head_size; + + size_t output_token_offset = subsequence_begins[subsequence_idx] + token_idx; + size_t output_offset = output_token_offset * num_heads * head_size + + head_idx * head_size; + + set_values(test_stream, memory, data_ptr, head_size, output_offset); + } + } + } + + return memory; + } + + template + static void set_values(stream& stream, memory::ptr mem, T* vals, size_t size, size_t dst_offset) { + mem_lock mem_ptr(mem, stream); + for (size_t i = 0; i < size; i++) { + mem_ptr[dst_offset + i] = vals[i]; + } + } + + static std::vector generate_input_data(tests::random_generator& rg, size_t num_heads, size_t tokens_num, size_t head_size) { + const size_t total_elements_num = tokens_num * num_heads * head_size; + auto data = rg.generate_random_1d(total_elements_num, -1, 1); + + return data; + } +}; + +struct PagedAttentionReference { + PagedAttentionReference(PagedAttentionManager& pam) + : pam(pam) + , test_engine(pam.test_engine) + , test_stream(pam.test_stream) {} + + std::pair, std::vector> get_reference() { + std::vector ref_data_output; + std::vector ref_scores_output; + + for (size_t i = 0; i < pam.subsequence_descs.size(); i++) { + const auto& subsequence_desc = pam.subsequence_descs[i]; + const auto kv_seq_len = subsequence_desc.num_tokens + subsequence_desc.past_len; + auto subsequence_ref_results = run_reference(pam.query_data[i], + pam.key_data[i], + pam.value_data[i], + subsequence_desc.num_tokens, + kv_seq_len, + pam.num_heads, + pam.head_size, + pam.get_default_scale()); + + // concatenate all subsequences into one vector + ref_data_output.insert(ref_data_output.end(), + subsequence_ref_results.first.begin(), + subsequence_ref_results.first.end()); + ref_scores_output.insert(ref_scores_output.end(), + subsequence_ref_results.second.begin(), + subsequence_ref_results.second.end()); + } + + return { ref_data_output, ref_scores_output }; + } + +private: + std::pair, std::vector> + run_reference(const std::vector& query_data, + const std::vector& key_data, + const std::vector& value_data, + int num_queries, + int num_keys, + int num_heads, + int head_size, + float scale) { + auto query_shape = ov::PartialShape{1, num_queries, num_heads, head_size}; + auto key_shape = ov::PartialShape{1, num_keys, num_heads, head_size}; + auto value_shape = ov::PartialShape{1, num_keys, num_heads, head_size}; + + auto query_layout = layout{query_shape, data_types::f16, format::bfyx}; + auto key_layout = layout{key_shape, data_types::f16, format::bfyx}; + auto value_layout = layout{value_shape, data_types::f16, format::bfyx}; + + OPENVINO_ASSERT(query_layout.count() == query_data.size()); + OPENVINO_ASSERT(key_layout.count() == key_data.size()); + OPENVINO_ASSERT(value_layout.count() == value_data.size()); + + auto query_mem = test_engine.allocate_memory(query_layout); + auto key_mem = test_engine.allocate_memory(key_layout); + auto value_mem = test_engine.allocate_memory(value_layout); + auto mask_mem = get_mask_mem(num_queries, num_keys, num_heads); + + set_values(query_mem, query_data); + set_values(key_mem, key_data); + set_values(value_mem, value_data); + + topology topology; + topology.add(input_layout("query", query_layout), + input_layout("key", key_layout), + input_layout("value", value_layout), + data("mask", mask_mem), + permute("query_transposed", input_info("query"), {0, 2, 1, 3}), + permute("key_transposed", input_info("key"), {0, 2, 1, 3}), + permute("value_transposed", input_info("value"), {0, 2, 1, 3}), + gemm("qk_gemm", { input_info("query_transposed"), input_info("key_transposed") }, data_types::f16, false, true, scale), + eltwise("eltwise", { input_info("qk_gemm"), input_info("mask") }, eltwise_mode::sum), + softmax("softmax", input_info("eltwise"), -1), + gemm("qkv_gemm", { input_info("softmax"), input_info("value_transposed") }, data_types::f16, false, false), + permute("qkv_gemm_transposed", input_info("qkv_gemm"), {0, 2, 1, 3}), + reorder("output_data", input_info("qkv_gemm_transposed"), format::bfyx, data_types::f16), + reorder("scores_data", input_info("softmax"), format::bfyx, data_types::f16) + ); + + ExecutionConfig config = get_test_default_config(test_engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + + network::ptr network = get_network(test_engine, topology, config, get_test_stream_ptr(), false); + network->set_input_data("query", query_mem); + network->set_input_data("key", key_mem); + network->set_input_data("value", value_mem); + + auto outputs = network->execute(); + + auto output_data_mem = outputs.at("output_data").get_memory(); + auto output_scores_mem = outputs.at("scores_data").get_memory(); + + return { get_output_data_vec(output_data_mem, num_queries, head_size, num_heads), + get_output_scores_vec(output_scores_mem, num_queries, num_keys, num_heads) }; + } + + std::vector get_output_scores_vec(memory::ptr scores_output, + int num_queries, + int num_keys, + int num_heads) { + OPENVINO_ASSERT(scores_output->count() == static_cast(num_heads * num_queries * num_keys)); + + std::vector output_scores(num_keys, 0); + mem_lock mem_ptr(scores_output, test_stream); + for (int head_idx = 0; head_idx < num_heads; head_idx++) { + for (int score_idx = 0; score_idx < num_keys; score_idx++) { + output_scores[score_idx] += mem_ptr[head_idx * num_queries * num_keys + + (num_queries - 1) * num_keys + + score_idx]; + } + } + + return output_scores; + } + + std::vector get_output_data_vec(memory::ptr data_output, + int num_queries, + int head_size, + int num_heads) { + OPENVINO_ASSERT(data_output->count() == static_cast(num_queries * num_heads * head_size)); + + std::vector output_data(data_output->count()); + mem_lock mem_ptr(data_output, test_stream); + for (size_t i = 0; i < data_output->count(); i++) + output_data[i] = mem_ptr[i]; + + return output_data; + } + + memory::ptr get_mask_mem(int num_queries, int num_keys, int num_heads) { + /* + * Two kinds of masks: + * + * Case 1 (N == K): + * num_queries = N + * num_keys = K = N + * head_size = H + * Q [N, H] * K[H, N] + * QK [N, N] + * 0 1 N + * 0 [ 0, MIN, .., MIN ] + * 1 [ 0, 0, .., MIN ] + * [ .., .., .., MIN ] + * N [ 0, 0, .., 0 ] + * + * Case 2 (N != K): + * num_queries = N + * num_keys = K + * head_size = H + * past_len = P = K - N + 1 + * Q [N, H] * K[H, K] + * QK [N, K] + * 0 1 2 P .. K + * 0 [ 0, 0, 0, MIN, MIN, MIN ] + * 1 [ 0, 0, 0, 0, MIN, MIN ] + * [ .., .., .., .., .., MIN ] + * N [ 0, 0, 0, 0, .., 0 ] + * + * Shapes: + * Q [1, num_heads, num_queries, head_size] + * K [1, num_heads, head_size, num_keys] + * Q*K [1, num_heads, num_queries, num_keys] + */ + + auto mask_shape = ov::PartialShape{ 1, 1, num_queries, num_keys }; + auto mask_layout = layout{mask_shape, data_types::f16, format::bfyx}; + auto mask_mem = test_engine.allocate_memory(mask_layout); + + int past_len = num_keys - num_queries + 1; + mem_lock mem_ptr(mask_mem, test_stream); + for (int i = 0; i < num_queries; i++) { + for (int j = 0; j < num_keys; j++) { + mem_ptr[i * num_keys + j] = j >= past_len + i ? std::numeric_limits::lowest() + : ov::float16(0.f); + } + } + + return mask_mem; + } + + + PagedAttentionManager& pam; + cldnn::engine& test_engine; + cldnn::stream& test_stream; +}; + +template +struct PagedAttentionTest : public ::testing::TestWithParam { +public: + random_generator rg; + cldnn::engine& engine = get_test_engine(); + float tolerance = 2e-3; + + void SetUp() override { + rg.set_seed(GET_SUITE_NAME); + } + + void execute(T& p) { + PagedAttentionManager pam(rg, get_test_engine(), get_test_stream(), p.subsequences, p.num_heads, p.head_size, p.block_size); + + auto query_mem = pam.get_query_memory(); + auto key_mem = pam.get_key_memory(); + auto value_mem = pam.get_value_memory(); + + auto key_cache_mem = pam.get_key_cache_memory(); + auto value_cache_mem = pam.get_value_cache_memory(); + + auto past_lens_mem = pam.get_past_lens_memory(); + auto subsequence_begins_mem = pam.get_subsequence_begins_memory(); + auto block_indices_mem = pam.get_block_indices_memory(); + auto block_indices_begins_mem = pam.get_block_indices_begins_memory(); + + auto scale_mem = pam.get_scale_memory(); + auto sliding_window_mem = pam.get_sliding_window_memory(); + auto alibi_mem = pam.get_alibi_memory(); + auto max_context_len_mem = pam.get_max_context_len_memory(); + + auto query_layout = query_mem->get_layout(); + auto key_layout = key_mem->get_layout(); + auto value_layout = value_mem->get_layout(); + auto key_cache_layout = key_cache_mem->get_layout(); + auto value_cache_layout = value_cache_mem->get_layout(); + auto past_lens_layout = past_lens_mem->get_layout(); + auto subsequence_begins_layout = subsequence_begins_mem->get_layout(); + auto block_indices_layout = block_indices_mem->get_layout(); + auto block_indices_begins_layout = block_indices_begins_mem->get_layout(); + auto scale_layout = scale_mem->get_layout(); + auto sliding_window_layout = sliding_window_mem->get_layout(); + auto alibi_layout = alibi_mem->get_layout(); + auto max_context_len_layout = max_context_len_mem->get_layout(); + + // make layouts dynamic + query_layout.set_partial_shape(ov::PartialShape{ -1, p.num_heads * p.head_size }); + key_layout.set_partial_shape(ov::PartialShape{ -1, p.num_heads * p.head_size }); + value_layout.set_partial_shape(ov::PartialShape{ -1, p.num_heads * p.head_size }); + key_cache_layout.set_partial_shape(ov::PartialShape{ -1, p.num_heads, p.head_size, p.block_size }); + value_cache_layout.set_partial_shape(ov::PartialShape{ -1, p.num_heads, p.block_size, p.head_size }); + past_lens_layout.set_partial_shape(ov::PartialShape{ -1 }); + subsequence_begins_layout.set_partial_shape(ov::PartialShape{ -1 }); + block_indices_layout.set_partial_shape(ov::PartialShape{ -1 }); + block_indices_begins_layout.set_partial_shape(ov::PartialShape{ -1 }); + + auto pa_prim = paged_attention("paged_attention", { input_info("query"), + input_info("key"), + input_info("value"), + input_info("key_cache"), + input_info("value_cache"), + input_info("past_lens"), + input_info("subsequence_begins"), + input_info("block_indices"), + input_info("block_indices_begins"), + input_info("scale"), + input_info("sliding_window"), + input_info("alibi"), + input_info("max_context_len") }); + + pa_prim.head_size = p.head_size; + pa_prim.kv_heads_num = p.num_heads; + pa_prim.heads_num = p.num_heads; + pa_prim.scale_val = pam.get_default_scale(); + pa_prim.has_alibi = false; + pa_prim.num_outputs = p.scores_output ? 2 : 1; + + topology topology; + topology.add( + input_layout("query", query_layout), + input_layout("key", key_layout), + input_layout("value", value_layout), + input_layout("key_cache", key_cache_layout), + input_layout("value_cache", value_cache_layout), + input_layout("past_lens", past_lens_layout), + input_layout("subsequence_begins", subsequence_begins_layout), + input_layout("block_indices", block_indices_layout), + input_layout("block_indices_begins", block_indices_begins_layout), + input_layout("scale", scale_layout), + input_layout("sliding_window", sliding_window_layout), + input_layout("alibi", alibi_layout), + input_layout("max_context_len", max_context_len_layout), + pa_prim, + reorder("output_data", input_info("paged_attention", 0), format::bfyx, data_types::f16) + ); + + if (p.scores_output) { + topology.add(reorder("output_scores", input_info("paged_attention", 1), format::bfyx, data_types::f16)); + } + + ExecutionConfig config = get_test_default_config(get_test_engine()); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + + network::ptr network = get_network(get_test_engine(), topology, config, get_test_stream_ptr(), false); + network->set_input_data("query", query_mem); + network->set_input_data("key", key_mem); + network->set_input_data("value", value_mem); + network->set_input_data("key_cache", key_cache_mem); + network->set_input_data("value_cache", value_cache_mem); + network->set_input_data("past_lens", past_lens_mem); + network->set_input_data("subsequence_begins", subsequence_begins_mem); + network->set_input_data("block_indices", block_indices_mem); + network->set_input_data("block_indices_begins", block_indices_begins_mem); + network->set_input_data("scale", scale_mem); + network->set_input_data("sliding_window", sliding_window_mem); + network->set_input_data("alibi", alibi_mem); + network->set_input_data("max_context_len", max_context_len_mem); + + auto outputs = network->execute(); + + cldnn::memory::ptr output_data_mem = nullptr; + cldnn::memory::ptr output_scores_mem = nullptr; + + output_data_mem = outputs.at("output_data").get_memory(); + if (p.scores_output) { + output_scores_mem = outputs.at("output_scores").get_memory(); + } + + auto ref_data = PagedAttentionReference(pam).get_reference(); + compare(output_data_mem, output_scores_mem, ref_data); + } + + void compare(memory::ptr data_output_mem, memory::ptr scores_output_mem, std::pair, std::vector> ref_data) { + if (data_output_mem) { + ASSERT_EQ(data_output_mem->count(), ref_data.first.size()); + mem_lock mem_ptr(data_output_mem, get_test_stream()); + for (size_t i = 0; i < data_output_mem->count(); i++) { + ASSERT_NEAR(mem_ptr[i], ref_data.first[i], tolerance); + } + } + + if (scores_output_mem) { + ASSERT_EQ(scores_output_mem->count(), ref_data.second.size()); + mem_lock mem_ptr(scores_output_mem, get_test_stream()); + for (size_t i = 0; i < scores_output_mem->count(); i++) { + ASSERT_NEAR(mem_ptr[i], ref_data.second[i], tolerance); + } + } + } +}; + +struct paged_attention_test_params { + std::vector subsequences; + int num_heads; + int head_size; + int block_size; + bool scores_output; +}; + +class paged_attention_test : public PagedAttentionTest {}; +TEST_P(paged_attention_test, basic) { + auto p = GetParam(); + + execute(p); +} + +INSTANTIATE_TEST_SUITE_P(smoke_paged_attention, paged_attention_test, ::testing::ValuesIn(std::vector{ + /* with scores output */ + paged_attention_test_params{ {{10, 0}}, 2, 64, 16, true }, // 1st token + paged_attention_test_params{ {{36, 0}}, 2, 64, 16, true }, // 1st token + paged_attention_test_params{ {{1024, 0}}, 2, 64, 16, true }, // 1st token long + paged_attention_test_params{ {{10, 0}, {30, 0}}, 2, 64, 16, true }, // 1st token + 1st token + paged_attention_test_params{ {{128, 0}, {256, 0}}, 2, 64, 16, true }, // 1st token + 1st token + paged_attention_test_params{ {{1, 10}}, 2, 64, 16, true }, // 2nd token + paged_attention_test_params{ {{1, 34}, {1, 515}}, 2, 64, 16, true }, // 2nd token + 2nd token + paged_attention_test_params{ {{1, 34}, {25, 0}, {10, 34}}, 2, 64, 16, true }, // mixed: 2nd token + 1st token + part of 1st token + /* without scores output */ + paged_attention_test_params{ {{10, 0}}, 2, 64, 16, false }, // 1st token + paged_attention_test_params{ {{1024, 0}}, 2, 64, 16, false }, // 1st token long + paged_attention_test_params{ {{1, 34}, {1, 515}}, 2, 64, 16, false }, // 2nd token + 2nd token +})); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp index 8ade3b6c8e0f31..0f9f119f275a78 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp @@ -2467,6 +2467,99 @@ TEST(reorder_gpu_f32, bfzyx_to_bsv16_fsv16_padded) } } +TEST(reorder_gpu_f32, bfzyx_to_bfyx_padded) { + tests::random_generator rg(GET_SUITE_NAME); + auto& engine = get_test_engine(); + + const int32_t b_in = 1024; + const int32_t f_in = 64; + const int32_t x_in = 72; + const int32_t y_in = 2; + const int32_t z_in = 3; + + const int32_t b_crop = 1024; + const int32_t f_crop = 64; + const int32_t x_crop = 72; + const int32_t y_crop = 2; + const int32_t z_crop = 1; + + const int32_t z0_off = 0; + const int32_t z1_off = 1; + const int32_t z2_off = 2; + + auto input = engine.allocate_memory({ data_types::f32,format::bfzyx,{ b_in, f_in, x_in, y_in, z_in } }); + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(crop("crop0", input_info("input"), { b_crop, f_crop, x_crop, y_crop, z_crop }, { 0, 0, 0, 0, z0_off })); + topology.add(crop("crop1", input_info("input"), { b_crop, f_crop, x_crop, y_crop, z_crop }, { 0, 0, 0, 0, z1_off })); + topology.add(crop("crop2", input_info("input"), { b_crop, f_crop, x_crop, y_crop, z_crop }, { 0, 0, 0, 0, z2_off })); + topology.add(reorder("reorder0", input_info("crop0"), format::bfyx, data_types::f32)); + topology.add(reorder("reorder1", input_info("crop1"), format::bfyx, data_types::f32)); + topology.add(reorder("reorder2", input_info("crop2"), format::bfyx, data_types::f32)); + topology.add(reshape("reshape0", input_info("reorder0"), tensor(batch(b_in), feature(y_in), spatial(x_in, f_in)))); + topology.add(reshape("reshape1", input_info("reorder1"), tensor(batch(b_in), feature(y_in), spatial(x_in, f_in)))); + topology.add(reshape("reshape2", input_info("reorder2"), tensor(batch(b_in), feature(y_in), spatial(x_in, f_in)))); + + std::vector input_vec = rg.generate_random_1d(input->count(), -10, 10); + set_values(input, input_vec); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); + + network.set_input_data("input", input); + auto outputs = network.execute(); + auto output0 = outputs.at("reshape0").get_memory(); + auto output1 = outputs.at("reshape1").get_memory(); + auto output2 = outputs.at("reshape2").get_memory(); + + cldnn::mem_lock output_ptr0(output0, get_test_stream()); + for (int b = 0; b < b_crop; ++b) { + for (int f = 0; f < f_crop; ++f) { + for (int z = 0; z < z_crop; ++z) { + for (int y = 0; y < y_crop; ++y) { + for (int x = 0; x < x_crop; ++x) { + int linear_id = x + x_in * (y + y_in * (z + z0_off + z_in * (f + f_in * b))); + int output_linear_id = x + x_crop * (y + y_crop * (z + z_crop * (f + f_crop * b))); + ASSERT_EQ(output_ptr0[output_linear_id], input_vec[linear_id]); + } + } + } + } + } + + cldnn::mem_lock output_ptr1(output1, get_test_stream()); + for (int b = 0; b < b_crop; ++b) { + for (int f = 0; f < f_crop; ++f) { + for (int z = 0; z < z_crop; ++z) { + for (int y = 0; y < y_crop; ++y) { + for (int x = 0; x < x_crop; ++x) { + int linear_id = x + x_in * (y + y_in * (z + z1_off + z_in * (f + f_in * b))); + int output_linear_id = x + x_crop * (y + y_crop * (z + z_crop * (f + f_crop * b))); + ASSERT_EQ(output_ptr1[output_linear_id], input_vec[linear_id]); + } + } + } + } + } + + cldnn::mem_lock output_ptr2(output2, get_test_stream()); + for (int b = 0; b < b_crop; ++b) { + for (int f = 0; f < f_crop; ++f) { + for (int z = 0; z < z_crop; ++z) { + for (int y = 0; y < y_crop; ++y) { + for (int x = 0; x < x_crop; ++x) { + int linear_id = x + x_in * (y + y_in * (z + z2_off + z_in * (f + f_in * b))); + int output_linear_id = x + x_crop * (y + y_crop * (z + z_crop * (f + f_crop * b))); + ASSERT_EQ(output_ptr2[output_linear_id], input_vec[linear_id]); + } + } + } + } + } +} + TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_allowed) { auto& engine = get_test_engine(); diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp index 260a1c444284cb..eb13bc8b5bd1d9 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp @@ -5,14 +5,208 @@ #include "llm_infer_request.hpp" #include "logging.hpp" +#include "openvino/op/ops.hpp" +#include "openvino/openvino.hpp" +#include "openvino/opsets/opset13.hpp" +#include "openvino/pass/graph_rewrite.hpp" +#include "openvino/pass/matcher_pass.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" #include "openvino/pass/stateful_to_stateless.hpp" +#include "openvino/pass/validate.hpp" #include "openvino/runtime/iasync_infer_request.hpp" +namespace opp = ov::pass::pattern; +class TransposeValueTensors : public ov::pass::MatcherPass { +public: + struct Context { + std::vector> new_params; + std::vector> old_params; + using Ref = std::reference_wrapper; + }; + + OPENVINO_MATCHER_PASS_RTTI("npuw::LLMCompiledModel::TransposeValueTensors"); + TransposeValueTensors(Context::Ref ctx) { + auto param = opp::wrap_type(); + auto transpose = opp::wrap_type({opp::any_input(), opp::any_input()}); + auto concat = opp::wrap_type({param, transpose}); + auto softmax = opp::wrap_type({opp::any_input()}); + auto matmul = opp::wrap_type({softmax, concat}); + + auto callback = [=](ov::pass::pattern::Matcher& m) { + auto& node_to_output = m.get_pattern_value_map(); + + auto matched_node_param = node_to_output.at(param).get_node_shared_ptr(); + auto matched_node_concat = node_to_output.at(concat).get_node_shared_ptr(); + auto matched_node_transpose = node_to_output.at(transpose).get_node_shared_ptr(); + auto matched_node_matmul = node_to_output.at(matmul).get_node_shared_ptr(); + + auto matched_param = std::static_pointer_cast(matched_node_param); + auto matched_concat = std::static_pointer_cast(matched_node_concat); + auto matched_transpose = std::static_pointer_cast(matched_node_transpose); + auto matched_matmul = std::static_pointer_cast(matched_node_matmul); + + auto shape = matched_param->get_partial_shape(); + OPENVINO_ASSERT(shape.size() == 4u); + // NB: Transpose Parameter that correspond to V-tensor it will + // speed-up its multiplication with attention scores + std::swap(shape[2], shape[3]); + auto new_param = std::make_shared(matched_param->get_element_type(), shape); + new_param->set_friendly_name(matched_param->get_friendly_name()); + new_param->outputs().begin()->get_tensor().set_names( + matched_param->outputs().begin()->get_tensor().get_names()); + ov::replace_node(matched_param, new_param); + // NB: Save in order to add/remove to the model later on + ctx.get().new_params.push_back(new_param); + ctx.get().old_params.push_back(matched_param); + + auto order_cst = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{4}, {0, 2, 3, 1}); + auto new_transpose = + std::make_shared(matched_transpose->input_value(0), order_cst->output(0)); + new_transpose->set_friendly_name(matched_transpose->get_friendly_name()); + ov::replace_node(matched_transpose, new_transpose); + + auto new_concat = + std::make_shared(ov::OutputVector{new_param->output(0), new_transpose->output(0)}, + 3u); + new_concat->set_friendly_name(matched_concat->get_friendly_name()); + ov::replace_node(matched_concat, new_concat); + + matched_matmul->set_transpose_b(true); + + return true; + }; + register_matcher(std::make_shared(matmul, "TransposeValueTensors"), std::move(callback)); + } +}; + +class ScaledDotProductAttentionDecomposition : public ov::pass::MatcherPass { +public: + OPENVINO_MATCHER_PASS_RTTI("npuw::LLMCompiledModel::ScaledDotProductAttentionDecomposition"); + ScaledDotProductAttentionDecomposition() { + auto pattern_node = ov::pass::pattern::wrap_type(); + + ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); + auto node = ov::as_type_ptr( + pattern_to_output.at(pattern_node).get_node_shared_ptr()); + + if (node == nullptr || transformation_callback(node)) { + return false; + } + + auto new_output_node = decompose(node); + ov::replace_node(node, new_output_node); + return true; + }; + + auto m = std::make_shared(pattern_node, "ScaledDotProductAttentionDecomposition"); + register_matcher(m, std::move(callback)); + } + std::shared_ptr decompose(std::shared_ptr node) { + using namespace ov::op; + using namespace ov; + auto query = node->input_value(0); + auto key = node->input_value(1); + auto value = node->input_value(2); + auto q_shape = register_new_node(query, element::i32); + auto k_shape = register_new_node(key, element::i32); + auto minus_one = register_new_node(v0::Constant::create(element::i32, Shape{}, {-1})); + auto minus_two = register_new_node(v0::Constant::create(element::i32, Shape{}, {-2})); + auto zero_i = register_new_node(v0::Constant::create(element::i32, Shape{}, {0})); + auto one_i = register_new_node(v0::Constant::create(element::i32, Shape{}, {1})); + auto one_f = register_new_node(one_i, query); + auto zero_f = register_new_node(zero_i, query); + + Output scale; + if (node->get_input_size() < 5) { + scale = register_new_node(q_shape, minus_one, zero_i)->output(0); + scale = register_new_node(scale, query); + auto sqrt_scale = register_new_node(scale); + scale = register_new_node(one_f, sqrt_scale); + } else { + scale = node->input_value(4); + } + + auto q_scaled = register_new_node(query, scale); + auto k_rank = register_new_node(k_shape, element::i32)->output(0); + auto k_last_dim = register_new_node(k_rank, minus_one); + auto k_next_dim = register_new_node(k_rank, minus_two)->output(0); + k_rank = register_new_node(k_rank, zero_i); + auto minus_inf = + register_new_node(v0::Constant::create(element::f32, Shape{}, {-std::numeric_limits::infinity()})) + ->output(0); + auto keep_dim_last = register_new_node(k_next_dim, zero_i); + auto k_dims_before_transpose = register_new_node(zero_i, keep_dim_last, one_i, element::i32); + + auto scaled_atten = register_new_node(q_scaled, key, false, true)->output(0); + minus_inf = register_new_node(minus_inf, scaled_atten); + + if (node->get_causal() || node->get_input_size() > 3) { + Output mask; + Output atten_mask; + if (!node->get_causal()) { + mask = node->input_value(3); + + // two types of masks are supported. A boolean mask where a value of True indicates that the element + // should take part in attention. A float mask of the same type as query, key, value that is added to + // the attention score. + if (mask.get_element_type() == element::boolean) { + atten_mask = register_new_node(mask, scaled_atten); + auto inv_mask = register_new_node(mask); + atten_mask = register_new_node(inv_mask, atten_mask, minus_inf); + } else { + atten_mask = mask; + } + } else { + auto target_s_len = register_new_node(q_shape, minus_two, zero_i); + auto source_s_len = register_new_node(k_shape, minus_two, zero_i); + auto ssl = register_new_node(source_s_len, zero_i); + auto tsl = register_new_node(target_s_len, zero_i); + auto mask_shape = register_new_node(OutputVector{tsl, ssl}, 0); + mask = register_new_node(minus_inf, mask_shape); + auto horizontal_range = + register_new_node(zero_i, source_s_len, one_i, element::i32)->output(0); + horizontal_range = register_new_node(horizontal_range, zero_i); + auto stop = register_new_node(target_s_len, one_i); + auto vertical_range = register_new_node(one_i, stop, one_i, element::i32)->output(0); + vertical_range = register_new_node(vertical_range, one_i); + auto triu = register_new_node(horizontal_range, vertical_range); + atten_mask = register_new_node(triu, mask, zero_f); + } + scaled_atten = register_new_node(scaled_atten, atten_mask); + } + + scaled_atten = register_new_node(scaled_atten, -1); + auto result = register_new_node(scaled_atten, value); + result->set_friendly_name(node->get_friendly_name()); + copy_runtime_info(node, get_new_nodes()); + return result; + } +}; + namespace { uint32_t align_to(uint32_t value, uint32_t alignment) { return (value + alignment - 1) & ~(alignment - 1); } +std::shared_ptr cvt_kvcache_to_fp16(const std::shared_ptr& model) { + ov::preprocess::PrePostProcessor ppp(model); + + for (const auto& tensor : model->inputs()) { + if (tensor.get_any_name().find("past_key") != std::string::npos) { + ppp.input(tensor.get_any_name()).tensor().set_element_type(ov::element::Type_t::f16); + } + } + + for (const auto& tensor : model->outputs()) { + if (tensor.get_any_name().find("present") != std::string::npos) { + ppp.output(tensor.get_any_name()).tensor().set_element_type(ov::element::Type_t::f16); + } + } + + return ppp.build(); +} + std::shared_ptr redirect_new_kv_to_output(const std::shared_ptr& model) { const auto kStartOutputKVCacheLayers = 1u; for (std::size_t i = kStartOutputKVCacheLayers; i < model->outputs().size(); ++i) { @@ -27,22 +221,33 @@ std::shared_ptr redirect_new_kv_to_output(const std::shared_ptr cvt_kvcache_to_fp16(const std::shared_ptr& model) { +std::shared_ptr cvt_value_tensors_layout(std::shared_ptr model) { ov::preprocess::PrePostProcessor ppp(model); - - for (const auto& tensor : model->inputs()) { - if (tensor.get_any_name().find("past_key") != std::string::npos) { - ppp.input(tensor.get_any_name()).tensor().set_element_type(ov::element::Type_t::f16); + for (auto tensor : model->outputs()) { + if (tensor.get_any_name().find("value") != std::string::npos) { + // NB: [batch, num_heads, seq_len, emb_size] -> [batch, num_heads, emb_size, seq_len] + ppp.output(tensor.get_any_name()).model().set_layout(ov::Layout("BHSE")); + ppp.output(tensor.get_any_name()).tensor().set_layout(ov::Layout("BHES")); } } + return ppp.build(); +} - for (const auto& tensor : model->outputs()) { - if (tensor.get_any_name().find("present") != std::string::npos) { - ppp.output(tensor.get_any_name()).tensor().set_element_type(ov::element::Type_t::f16); - } +bool optimize_value_tensors(std::shared_ptr model) { + ov::pass::GraphRewrite rewr; + rewr.add_matcher(); + TransposeValueTensors::Context ctx; + rewr.add_matcher(std::ref(ctx)); + rewr.run_on_model(model); + + model->add_parameters(ctx.new_params); + for (auto old_param : ctx.old_params) { + model->remove_parameter(old_param); } + ov::pass::Validate().run_on_model(model); - return ppp.build(); + // NB: if new_params is not empty - pass has been applied + return !ctx.new_params.empty(); } struct KVAxesPosition { @@ -116,32 +321,6 @@ std::optional extract_npu_descriptor(const std::shared_ptr(), max_tiles.as()}); } -std::optional pop_option(ov::AnyMap& config, const std::string& option_name) { - if (auto it = config.find(option_name); it != config.end()) { - std::optional found = std::make_optional(it->second); - config.erase(it); - return found; - } - return std::nullopt; -} - -template -std::optional get_option(ov::AnyMap& config, const std::string& option_name) { - if (auto it = config.find(option_name); it != config.end()) { - return std::make_optional(it->second.as()); - } - return std::nullopt; -} - -template -T pop_or_default(ov::AnyMap& config, const std::string& key, const T& default_value) { - auto anyopt = pop_option(config, key); - if (anyopt.has_value()) { - return anyopt.value().as(); - } - return default_value; -} - ov::AnyMap get_baseline_common_config() { ov::AnyMap config = { {"NPU_COMPILATION_MODE_PARAMS", "compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add_RMSNorm"}, @@ -206,12 +385,6 @@ void merge_config_with(ov::AnyMap& lhs, const ov::AnyMap& rhs) { } } -void drop_cache_dir(ov::AnyMap& config) { - if (config.count("NPU_USE_NPUW") != 0u) { - pop_option(config, "CACHE_DIR"); - } -} - void split_llm_properties(const ov::AnyMap& properties, ov::AnyMap& llm_properties, ov::AnyMap& other_properties) { for (auto it = properties.begin(); it != properties.end(); ++it) { if (it->first.find("NPUW_LLM") != it->first.npos) { @@ -251,41 +424,48 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr& m auto kvcache_model = model->clone(); LOG_DEBUG("2. Transform kvcache model from stateful to stateless."); ov::pass::StatefulToStateless().run_on_model(kvcache_model); - LOG_DEBUG("3. Creating prefill model as clone of transformed kvcache one."); auto prefill_model = kvcache_model->clone(); prefill_model->set_friendly_name(kvcache_model->get_friendly_name() + "_prefill"); - LOG_DEBUG("4. Converting KV-cache in prefill model to FP16."); - prefill_model = cvt_kvcache_to_fp16(prefill_model); - - LOG_DEBUG("5. Optimize kvcache kvcache model to output key/values for new token."); - kvcache_model = redirect_new_kv_to_output(kvcache_model); - LOG_DEBUG("6. Converting KV-cache in kvcache model to FP16."); - kvcache_model = cvt_kvcache_to_fp16(kvcache_model); + const ::intel_npu::npuw::llm::ModelDesc model_desc = m_cfg.get<::intel_npu::NPUW_LLM_MODEL_DESC>(); const uint32_t kMaxPromptLen = align_to(m_cfg.get<::intel_npu::NPUW_LLM_MAX_PROMPT_LEN>(), 64u); const uint32_t kMinResponseLen = align_to(m_cfg.get<::intel_npu::NPUW_LLM_MIN_RESPONSE_LEN>(), 64u); - const ::intel_npu::npuw::llm::ModelDesc model_desc = m_cfg.get<::intel_npu::NPUW_LLM_MODEL_DESC>(); KVAxesPosition axes = get_kv_axes(model_desc.type); m_kvcache_desc = KVCacheDesc{kMaxPromptLen, kMaxPromptLen + kMinResponseLen, 0u, axes.seq_len}; - LOG_DEBUG("7. Make prefill model with static shapes"); + LOG_DEBUG("4. Make prefill model with static shapes"); reshape_to_static(prefill_model, m_kvcache_desc.max_prompt_size, m_kvcache_desc.max_prompt_size, axes); - LOG_DEBUG("8. Make kvcache model with static shapes"); + LOG_DEBUG("5. Make kvcache model with static shapes"); reshape_to_static(kvcache_model, 1u, m_kvcache_desc.total_size, axes); + LOG_DEBUG("6.Check and apply opt layout if applicable."); + // NB: Try to apply opt transpose only for Llama-2-7b-chat-hf model + if (model_desc.name_or_path == "meta-llama/Llama-2-7b-chat-hf" || + (model_desc.type == "llama" && model_desc.num_key_value_heads == 32)) { + if (optimize_value_tensors(kvcache_model)) { + // NB: Check if TransposeValueTensors transformation was applied + m_kvcache_desc.v_tensors_transposed = true; + prefill_model = cvt_value_tensors_layout(prefill_model); + } + } + LOG_DEBUG("7. Optimize kvcache model to output key/values for new token."); + kvcache_model = redirect_new_kv_to_output(kvcache_model); + LOG_DEBUG("8. Converting KV-cache in kvcache model to FP16."); + kvcache_model = cvt_kvcache_to_fp16(kvcache_model); + LOG_DEBUG("9. Converting KV-cache in prefill model to FP16."); + prefill_model = cvt_kvcache_to_fp16(prefill_model); auto npudesc = extract_npu_descriptor(plugin); - - ov::AnyMap properties_copy = std::move(other_props); + ov::AnyMap properties_copy = other_props; auto prefill_config = get_default_prefill_config(model, npudesc); + // NB: GENERATE_HINT is only applicable for default generate config! const ::intel_npu::npuw::llm::GenerateHint generate_hint = m_cfg.get<::intel_npu::NPUW_LLM_GENERATE_HINT>(); - LOG_DEBUG("9. Passed GENERATE_HINT: " << std::string(::intel_npu::NPUW_LLM_GENERATE_HINT::toString(generate_hint))); + LOG_DEBUG( + "10. Passed GENERATE_HINT: " << std::string(::intel_npu::NPUW_LLM_GENERATE_HINT::toString(generate_hint))); auto generate_config = get_default_generate_config(model, npudesc, generate_hint); + merge_config_with(prefill_config, properties_copy); merge_config_with(generate_config, properties_copy); - // FIXME: Drop CACHE_DIR option if NPUW is enabled - drop_cache_dir(prefill_config); - drop_cache_dir(generate_config); m_kvcache_compiled = std::make_shared(kvcache_model, plugin, generate_config); m_prefill_compiled = std::make_shared(prefill_model, plugin, prefill_config); diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.hpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.hpp index 1a748997fd48fa..e37a47b2c77948 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.hpp @@ -22,6 +22,7 @@ class LLMCompiledModel : public ov::npuw::ICompiledModel { uint32_t total_size = 0u; uint32_t num_stored_tokens = 0u; uint32_t dim = 0u; + bool v_tensors_transposed = false; }; LLMCompiledModel(const std::shared_ptr& model, diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp index a73478c0cab5d2..12f103cc0ab6a2 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp @@ -27,6 +27,36 @@ ov::SoPtr make_tensor_slice(ov::SoPtr tensor, end_shape[dim] = end_pos; return ov::get_tensor_impl(ov::Tensor(ov::make_tensor(tensor), start_shape, end_shape)); } + +void copy_columns_by_row_chunks(ov::SoPtr src, ov::SoPtr& dst) { + const auto src_shape = src->get_shape(); + + OPENVINO_ASSERT(src_shape.size() == 4u); + OPENVINO_ASSERT(src_shape == dst->get_shape()); + OPENVINO_ASSERT(src->get_byte_size() == dst->get_byte_size()); + + const auto src_strides = src->get_strides(); + const auto dst_strides = dst->get_strides(); + const auto elem_size = src->get_byte_size() / src->get_size(); + + const auto C = src_shape[1]; + const auto H = src_shape[2]; + const auto W = src_shape[3]; + + const auto IS_H = src_strides[2]; + const auto OS_H = dst_strides[2]; + + const size_t chunk_byte_size = W * elem_size; + + const auto* src_p = static_cast(src->data()); + auto* dst_p = static_cast(dst->data()); + + for (size_t i = 0; i < C * H; ++i) { + const size_t src_offset = i * IS_H; + const size_t dst_offset = i * OS_H; + std::copy_n(src_p + src_offset, chunk_byte_size, dst_p + dst_offset); + } +} } // anonymous namespace ov::npuw::LLMInferRequest::LLMInferRequest(const std::shared_ptr& compiled_model, @@ -116,17 +146,25 @@ void ov::npuw::LLMInferRequest::infer_generate(ov::SoPtr input_ids, // taking into account kvcache dimension. fill_tensor(kvcache_in_tensor, 0); + const auto& kv_dim = (output_name.find("value") != std::string::npos && m_kvcache_desc.v_tensors_transposed) + ? 3u + : m_kvcache_desc.dim; + auto prefill_out_slice = make_tensor_slice(prefill_out_tensor, - m_kvcache_desc.dim, + kv_dim, m_kvcache_desc.max_prompt_size - m_kvcache_desc.num_stored_tokens, m_kvcache_desc.max_prompt_size); - auto kvcache_in_slice = - make_tensor_slice(kvcache_in_tensor, m_kvcache_desc.dim, 0u, m_kvcache_desc.num_stored_tokens); + auto kvcache_in_slice = make_tensor_slice(kvcache_in_tensor, kv_dim, 0u, m_kvcache_desc.num_stored_tokens); - prefill_out_slice->copy_to(kvcache_in_slice._ptr); + if (kv_dim == 3u) { + copy_columns_by_row_chunks(prefill_out_slice, kvcache_in_slice); + } else { + prefill_out_slice->copy_to(kvcache_in_slice._ptr); + } } + LOG_DEBUG("Prepare attention mask pattern."); auto* attention_mask_data = m_kvcache_request->get_tensor(m_kvcache_in_ports.at("attention_mask"))->data(); @@ -156,8 +194,11 @@ void ov::npuw::LLMInferRequest::infer_generate(ov::SoPtr input_ids, const auto& output_name = kvcache_compiled->outputs()[kStartOutputKVCacheLayers + i].get_any_name(); const auto& input_name = std::regex_replace(output_name, std::regex("present"), "past_key_values"); auto kvcache_in_tensor = m_kvcache_request->get_tensor(m_kvcache_in_ports.at(input_name)); + const auto& kv_dim = (output_name.find("value") != std::string::npos && m_kvcache_desc.v_tensors_transposed) + ? 3u + : m_kvcache_desc.dim; auto kvcache_in_slice = make_tensor_slice(kvcache_in_tensor, - m_kvcache_desc.dim, + kv_dim, m_kvcache_desc.num_stored_tokens - 1, m_kvcache_desc.num_stored_tokens); auto kvcache_out_tensor = m_kvcache_request->get_tensor(m_kvcache_out_ports.at(output_name)); diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp index 5abe4b39fd44f2..0260fc9718c444 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp @@ -160,7 +160,8 @@ DQMatMulCWi::DQMatMulCWi(Context::Ref ctx) { auto qcoeff_shape = matched_node_qcoeff->output(0).get_shape(); if ((ov::element::i4 == matched_qweight->get_element_type() || - ov::element::i8 == matched_qweight->get_element_type()) && + ov::element::i8 == matched_qweight->get_element_type() || + ov::element::nf4 == matched_qweight->get_element_type()) && (ov::op::util::is_parameter(matched_node_qcoeff) || ov::op::util::is_constant(matched_node_qcoeff)) && qcoeff_shape[1] == 1 && !matched_matmul->get_transpose_a() && matched_matmul->get_transpose_b()) { auto matched_node_cvtw = node_to_output.at(qcvtw).get_node_shared_ptr(); diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/fake_convert.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/fake_convert.hpp new file mode 100644 index 00000000000000..d22809e332b0a3 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/fake_convert.hpp @@ -0,0 +1,16 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/fake_convert.hpp" + +namespace ov { +namespace test { + +TEST_P(FakeConvertLayerTest, Inference) { + run(); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/fake_convert.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/fake_convert.hpp new file mode 100644 index 00000000000000..ce6ad97aba1b5d --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/fake_convert.hpp @@ -0,0 +1,28 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { +using FakeConvertParams = std::tuple, // Data shape + Shape, // Scale shape + Shape, // Shift shape + ov::element::Type, // Input precision + ov::element::Type, // Ddestination precision + bool, // Default shift + std::string>; // Device name + +class FakeConvertLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/fake_convert.cpp b/src/tests/functional/shared_test_classes/src/single_op/fake_convert.cpp new file mode 100644 index 00000000000000..d207a8dabfb883 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/fake_convert.cpp @@ -0,0 +1,64 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/fake_convert.hpp" + +#include "openvino/opsets/opset1.hpp" +#include "openvino/opsets/opset13.hpp" + +namespace ov { +namespace test { +std::string FakeConvertLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + FakeConvertParams params = obj.param; + + std::vector data_shapes; + Shape scale_shape, shift_shape; + element::Type_t data_prec, dst_prec; + bool default_shift; + std::string target_device; + std::tie(data_shapes, scale_shape, shift_shape, data_prec, dst_prec, default_shift, target_device) = params; + + std::ostringstream result; + result << "IS=("; + for (const auto& shape : data_shapes) { + result << ov::test::utils::partialShape2str({shape.first}) << "_"; + } + result << ")_TS=("; + for (const auto& shape : data_shapes) { + for (const auto& item : shape.second) { + result << ov::test::utils::vec2str(item) << "_"; + } + } + result << ")_scaleShape=" << ov::test::utils::vec2str(scale_shape) << "_"; + result << "shiftShape=" << ov::test::utils::vec2str(shift_shape) << "_"; + result << "dataPrecision=" << element::Type(data_prec) << "_"; + result << "destinationPrecision=" << element::Type(dst_prec) << "_"; + if (default_shift) + result << "defaultShift=true"; + else + result << "defaultShift=false"; + return result.str(); +} + +void FakeConvertLayerTest::SetUp() { + FakeConvertParams params = this->GetParam(); + + std::vector data_shapes; + Shape scale_shape, shift_shape; + element::Type_t data_prec, dst_prec; + bool default_shift; + std::tie(data_shapes, scale_shape, shift_shape, data_prec, dst_prec, default_shift, targetDevice) = params; + + init_input_shapes(data_shapes); + + const auto data = std::make_shared(data_prec, inputDynamicShapes.front()); + const auto scale = std::make_shared(data_prec, scale_shape); + const auto shift = std::make_shared(data_prec, shift_shape); + + const auto fake_convert = default_shift ? std::make_shared(data, scale, dst_prec) + : std::make_shared(data, scale, shift, dst_prec); + function = std::make_shared(NodeVector{fake_convert}, ParameterVector{data}); +} +} // namespace test +} // namespace ov diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 08b4308479ef03..de3ad80280d603 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,5 +7,5 @@ add_subdirectory(model_hub_tests) add_subdirectory(samples_tests) add_subdirectory(e2e_tests) -install(FILES requirements_pytorch requirements_tensorflow requirements_onnx +install(FILES requirements_pytorch requirements_tensorflow requirements_onnx requirements_jax DESTINATION tests COMPONENT tests EXCLUDE_FROM_ALL) diff --git a/tests/constraints.txt b/tests/constraints.txt index 4f46cd0cc8b2e9..c339ac3c65d56f 100644 --- a/tests/constraints.txt +++ b/tests/constraints.txt @@ -21,11 +21,8 @@ pytest>=5.0,<8.4 pytest-dependency==0.5.1 pytest-html==4.1.1 pytest-timeout==2.3.1 -jax<=0.4.36 -jaxlib<=0.4.36 kornia==0.7.0 networkx<=3.3 -flax<=0.10.2 --extra-index-url https://download.pytorch.org/whl/cpu torch~=2.5.1; platform_system != "Darwin" or platform_machine != "x86_64" diff --git a/tests/e2e_tests/requirements.txt b/tests/e2e_tests/requirements.txt index 29e1c1cf31c558..a2056071e5417e 100644 --- a/tests/e2e_tests/requirements.txt +++ b/tests/e2e_tests/requirements.txt @@ -9,7 +9,7 @@ scipy>=1.5.4,<1.15 opencv-python>=4.5; sys_platform != "darwin" opencv-python==4.8.1.78; sys_platform == "darwin" unittest-xml-reporting==3.0.4 -lpips==0.1.3 +lpips==0.1.4 # for utils/e2e/comparator note: python 3.6 wheels is not available since 0.18 # Add upper-bound due CVS-105039, CVS-105040 diff --git a/tests/layer_tests/onnx_tests/test_abs.py b/tests/layer_tests/onnx_tests/test_abs.py index 9a82929ea35547..71e509faef3e65 100644 --- a/tests/layer_tests/onnx_tests/test_abs.py +++ b/tests/layer_tests/onnx_tests/test_abs.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_and.py b/tests/layer_tests/onnx_tests/test_and.py index ca5d21a42fe067..195ace1dadfa14 100644 --- a/tests/layer_tests/onnx_tests/test_and.py +++ b/tests/layer_tests/onnx_tests/test_and.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_argmax.py b/tests/layer_tests/onnx_tests/test_argmax.py index 604df5e7e69875..80d7568e9e8c4c 100644 --- a/tests/layer_tests/onnx_tests/test_argmax.py +++ b/tests/layer_tests/onnx_tests/test_argmax.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_ceil.py b/tests/layer_tests/onnx_tests/test_ceil.py index b7558630ac1c63..ea7ea10abbd31d 100644 --- a/tests/layer_tests/onnx_tests/test_ceil.py +++ b/tests/layer_tests/onnx_tests/test_ceil.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_clip.py b/tests/layer_tests/onnx_tests/test_clip.py index dbce45193034d9..3cb3ba250a12e0 100644 --- a/tests/layer_tests/onnx_tests/test_clip.py +++ b/tests/layer_tests/onnx_tests/test_clip.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_concat.py b/tests/layer_tests/onnx_tests/test_concat.py index 8627f3b198dbd3..602b6a69644527 100644 --- a/tests/layer_tests/onnx_tests/test_concat.py +++ b/tests/layer_tests/onnx_tests/test_concat.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_conv.py b/tests/layer_tests/onnx_tests/test_conv.py index b7f9729141c33e..202d6af2915c67 100644 --- a/tests/layer_tests/onnx_tests/test_conv.py +++ b/tests/layer_tests/onnx_tests/test_conv.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_cumsum.py b/tests/layer_tests/onnx_tests/test_cumsum.py index 1e197de490d518..486b1f50835fb0 100644 --- a/tests/layer_tests/onnx_tests/test_cumsum.py +++ b/tests/layer_tests/onnx_tests/test_cumsum.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_dequantize_linear.py b/tests/layer_tests/onnx_tests/test_dequantize_linear.py index 9090f3a829919b..319030590a3f0d 100644 --- a/tests/layer_tests/onnx_tests/test_dequantize_linear.py +++ b/tests/layer_tests/onnx_tests/test_dequantize_linear.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_elu.py b/tests/layer_tests/onnx_tests/test_elu.py index dbffc32d09c6c7..9f0321ec9a6ee3 100644 --- a/tests/layer_tests/onnx_tests/test_elu.py +++ b/tests/layer_tests/onnx_tests/test_elu.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_embedding_bag.py b/tests/layer_tests/onnx_tests/test_embedding_bag.py index a18a59b9752f16..54d940c01fb36c 100644 --- a/tests/layer_tests/onnx_tests/test_embedding_bag.py +++ b/tests/layer_tests/onnx_tests/test_embedding_bag.py @@ -5,6 +5,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + import torch import torch.nn as nn from common.layer_test_class import CommonLayerTest, check_ir_version diff --git a/tests/layer_tests/onnx_tests/test_floor.py b/tests/layer_tests/onnx_tests/test_floor.py index 87ad058c510e8c..5076befc414941 100644 --- a/tests/layer_tests/onnx_tests/test_floor.py +++ b/tests/layer_tests/onnx_tests/test_floor.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_gather.py b/tests/layer_tests/onnx_tests/test_gather.py index a45d5b4f4a916b..9380de31c6dccc 100644 --- a/tests/layer_tests/onnx_tests/test_gather.py +++ b/tests/layer_tests/onnx_tests/test_gather.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_hard_sigmoid.py b/tests/layer_tests/onnx_tests/test_hard_sigmoid.py index 12986c590d41d4..a62ab2a7fc54e8 100644 --- a/tests/layer_tests/onnx_tests/test_hard_sigmoid.py +++ b/tests/layer_tests/onnx_tests/test_hard_sigmoid.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_identity.py b/tests/layer_tests/onnx_tests/test_identity.py index a86c0e2a687257..e58e272de49ec0 100644 --- a/tests/layer_tests/onnx_tests/test_identity.py +++ b/tests/layer_tests/onnx_tests/test_identity.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_leaky_relu.py b/tests/layer_tests/onnx_tests/test_leaky_relu.py index 3a12bfcd92c33e..cff9cd87b59d30 100644 --- a/tests/layer_tests/onnx_tests/test_leaky_relu.py +++ b/tests/layer_tests/onnx_tests/test_leaky_relu.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_log.py b/tests/layer_tests/onnx_tests/test_log.py index db0a329aa09746..53e2c42505bf7b 100644 --- a/tests/layer_tests/onnx_tests/test_log.py +++ b/tests/layer_tests/onnx_tests/test_log.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_logsoftmax.py b/tests/layer_tests/onnx_tests/test_logsoftmax.py index a81b20402d50dd..057376d6ed48b2 100644 --- a/tests/layer_tests/onnx_tests/test_logsoftmax.py +++ b/tests/layer_tests/onnx_tests/test_logsoftmax.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_lrn.py b/tests/layer_tests/onnx_tests/test_lrn.py index 0e8f34129a300f..1c1cf62d5d12b4 100644 --- a/tests/layer_tests/onnx_tests/test_lrn.py +++ b/tests/layer_tests/onnx_tests/test_lrn.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_neg.py b/tests/layer_tests/onnx_tests/test_neg.py index d19991cb8a6b12..98f6acd728f637 100644 --- a/tests/layer_tests/onnx_tests/test_neg.py +++ b/tests/layer_tests/onnx_tests/test_neg.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_non_zero.py b/tests/layer_tests/onnx_tests/test_non_zero.py index 464304651a2a19..a2035b4ab27d63 100644 --- a/tests/layer_tests/onnx_tests/test_non_zero.py +++ b/tests/layer_tests/onnx_tests/test_non_zero.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_not.py b/tests/layer_tests/onnx_tests/test_not.py index 05a6c7ffbb2e2d..1caf8e2e7a770c 100644 --- a/tests/layer_tests/onnx_tests/test_not.py +++ b/tests/layer_tests/onnx_tests/test_not.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_or.py b/tests/layer_tests/onnx_tests/test_or.py index 285c90765d6a7e..6db35aff2f500e 100644 --- a/tests/layer_tests/onnx_tests/test_or.py +++ b/tests/layer_tests/onnx_tests/test_or.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_pad.py b/tests/layer_tests/onnx_tests/test_pad.py index abacc530d93144..161db0685b6fa8 100644 --- a/tests/layer_tests/onnx_tests/test_pad.py +++ b/tests/layer_tests/onnx_tests/test_pad.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_pooling.py b/tests/layer_tests/onnx_tests/test_pooling.py index 85e7fc883fc5d8..2bc2251f8aea49 100644 --- a/tests/layer_tests/onnx_tests/test_pooling.py +++ b/tests/layer_tests/onnx_tests/test_pooling.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_prelu.py b/tests/layer_tests/onnx_tests/test_prelu.py index f20e89b7006a44..59a1e8f4f415e1 100644 --- a/tests/layer_tests/onnx_tests/test_prelu.py +++ b/tests/layer_tests/onnx_tests/test_prelu.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_reduce.py b/tests/layer_tests/onnx_tests/test_reduce.py index 58141e18260016..46b4008c4e653d 100644 --- a/tests/layer_tests/onnx_tests/test_reduce.py +++ b/tests/layer_tests/onnx_tests/test_reduce.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_reduce_lp.py b/tests/layer_tests/onnx_tests/test_reduce_lp.py index 2ff4511ef87443..3cf2f5e133b895 100644 --- a/tests/layer_tests/onnx_tests/test_reduce_lp.py +++ b/tests/layer_tests/onnx_tests/test_reduce_lp.py @@ -5,6 +5,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_relu.py b/tests/layer_tests/onnx_tests/test_relu.py index ce597920923289..520749ed948b25 100644 --- a/tests/layer_tests/onnx_tests/test_relu.py +++ b/tests/layer_tests/onnx_tests/test_relu.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_reshape.py b/tests/layer_tests/onnx_tests/test_reshape.py index 637beeb4388bbb..28eb339af52f9e 100644 --- a/tests/layer_tests/onnx_tests/test_reshape.py +++ b/tests/layer_tests/onnx_tests/test_reshape.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_resize.py b/tests/layer_tests/onnx_tests/test_resize.py index 4d28afdb50fe38..36a808fa859ef1 100644 --- a/tests/layer_tests/onnx_tests/test_resize.py +++ b/tests/layer_tests/onnx_tests/test_resize.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_roi_align.py b/tests/layer_tests/onnx_tests/test_roi_align.py index 4cd49c50c20bf8..d5cedf4e1a0f06 100644 --- a/tests/layer_tests/onnx_tests/test_roi_align.py +++ b/tests/layer_tests/onnx_tests/test_roi_align.py @@ -5,6 +5,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model from unit_tests.utils.graph import build_graph diff --git a/tests/layer_tests/onnx_tests/test_scatter.py b/tests/layer_tests/onnx_tests/test_scatter.py index 578300e144bc3d..baaa0392553fbf 100644 --- a/tests/layer_tests/onnx_tests/test_scatter.py +++ b/tests/layer_tests/onnx_tests/test_scatter.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_sigmoid.py b/tests/layer_tests/onnx_tests/test_sigmoid.py index 5dcb3e8f1b112a..db055a6d9030ac 100644 --- a/tests/layer_tests/onnx_tests/test_sigmoid.py +++ b/tests/layer_tests/onnx_tests/test_sigmoid.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_sign.py b/tests/layer_tests/onnx_tests/test_sign.py index 07f4f169a7bc1b..70c0ffcc0033ec 100644 --- a/tests/layer_tests/onnx_tests/test_sign.py +++ b/tests/layer_tests/onnx_tests/test_sign.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_softmax.py b/tests/layer_tests/onnx_tests/test_softmax.py index c4d9d600276402..390b1a894549c3 100644 --- a/tests/layer_tests/onnx_tests/test_softmax.py +++ b/tests/layer_tests/onnx_tests/test_softmax.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_softplus.py b/tests/layer_tests/onnx_tests/test_softplus.py index cdcbbbf3e8ed13..b0127c0dcf0624 100644 --- a/tests/layer_tests/onnx_tests/test_softplus.py +++ b/tests/layer_tests/onnx_tests/test_softplus.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_softsign.py b/tests/layer_tests/onnx_tests/test_softsign.py index 30ca27402c7878..75043b57b80dc7 100644 --- a/tests/layer_tests/onnx_tests/test_softsign.py +++ b/tests/layer_tests/onnx_tests/test_softsign.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_sqrt.py b/tests/layer_tests/onnx_tests/test_sqrt.py index 9c4733a68cd9fa..24dbbcac659df4 100644 --- a/tests/layer_tests/onnx_tests/test_sqrt.py +++ b/tests/layer_tests/onnx_tests/test_sqrt.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_trigonometry.py b/tests/layer_tests/onnx_tests/test_trigonometry.py index 563b63b1e5632d..99651091ea2e96 100644 --- a/tests/layer_tests/onnx_tests/test_trigonometry.py +++ b/tests/layer_tests/onnx_tests/test_trigonometry.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_where.py b/tests/layer_tests/onnx_tests/test_where.py index fb358a2ced8415..1bf845340b3922 100644 --- a/tests/layer_tests/onnx_tests/test_where.py +++ b/tests/layer_tests/onnx_tests/test_where.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/onnx_tests/test_xor.py b/tests/layer_tests/onnx_tests/test_xor.py index 2790a31784ff59..e7f0c11f8362a2 100644 --- a/tests/layer_tests/onnx_tests/test_xor.py +++ b/tests/layer_tests/onnx_tests/test_xor.py @@ -3,6 +3,8 @@ import numpy as np import pytest +pytest.importorskip("openvino.tools.mo", reason="Ticket - 157136") + from common.layer_test_class import check_ir_version from common.onnx_layer_test_class import OnnxRuntimeLayerTest, onnx_make_model diff --git a/tests/layer_tests/requirements.txt b/tests/layer_tests/requirements.txt index 04889ebce10a39..2ba12cc5e2bece 100644 --- a/tests/layer_tests/requirements.txt +++ b/tests/layer_tests/requirements.txt @@ -16,5 +16,3 @@ pytest defusedxml tensorflow tensorflow-addons; python_version <= '3.10' -jax; sys_platform == "linux" and platform_machine == "x86_64" # https://jax.readthedocs.io/en/latest/installation.html#pip-installation-cpu - wheels are for "x86_64" only -jaxlib; sys_platform == "linux" and platform_machine == "x86_64" # https://jax.readthedocs.io/en/latest/installation.html#pip-installation-cpu - wheels are for "x86_64" only diff --git a/tests/layer_tests/tensorflow_tests/test_tf_UnaryOpsAllRealDomain.py b/tests/layer_tests/tensorflow_tests/test_tf_UnaryOpsAllRealDomain.py index 4ff4d589cbae32..5c1037e38cfc84 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_UnaryOpsAllRealDomain.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_UnaryOpsAllRealDomain.py @@ -67,4 +67,4 @@ def test_unary_ops(self, input_shape, input_type, op_type, pytest.skip("159585: accuracy error on ARM") self._test(*self.create_unary_net(input_shape, input_type, op_type), ie_device, precision, ir_version, temp_dir=temp_dir, - use_legacy_frontend=use_legacy_frontend, custom_eps=1e-3) + use_legacy_frontend=use_legacy_frontend, custom_eps=3 * 1e-3) diff --git a/tests/model_hub_tests/jax/requirements.txt b/tests/model_hub_tests/jax/requirements.txt deleted file mode 100644 index 328084ac050ca6..00000000000000 --- a/tests/model_hub_tests/jax/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ --c ../../constraints.txt -numpy -pytest -pytest-html -transformers -requests -jax -jaxlib -flax -pillow \ No newline at end of file diff --git a/tests/requirements_jax b/tests/requirements_jax new file mode 100644 index 00000000000000..c392df4359bee3 --- /dev/null +++ b/tests/requirements_jax @@ -0,0 +1,13 @@ +numpy==1.26.4; python_version < "3.12" or platform_system == "Darwin" and platform_machine == "x86_64" +numpy==2.2.1; python_version >= "3.12" and (platform_system != "Darwin" or platform_machine != "x86_64") +pytest==7.0.1 +pytest-xdist[psutil]==3.6.1 +pytest-html==4.1.1 +jax==0.4.38; (platform_system != "Darwin" or platform_machine != "x86_64") and python_version > "3.9" +# tensorflow 2.16.2 depends on ml-dtypes~=0.3.1 and jax 0.4.35 depends on ml-dtypes>=0.4.0 +jax==0.4.33; (platform_system == "Darwin" and platform_machine == "x86_64") and python_version > "3.9" +jax==0.4.30; python_version <= "3.9" +flax==0.10.2 +transformers==4.47.1 +defusedxml +pillow diff --git a/tests/requirements_pytorch b/tests/requirements_pytorch index f42deb81839883..33907145f7de4b 100644 --- a/tests/requirements_pytorch +++ b/tests/requirements_pytorch @@ -14,7 +14,8 @@ torchaudio==2.2.2; platform_system == "Darwin" and platform_machine == "x86_64" # transformers 4.45.1 is available # but optimum still requires <4.45.0 transformers==4.44.2 -pytest==7.0.1 +pytest==7.0.1; python_version < '3.10' +pytest==7.2.0; python_version >= '3.10' pytest-html==4.1.1 pytest-xdist[psutil]==3.6.1 defusedxml==0.7.1 diff --git a/tests/requirements_tensorflow b/tests/requirements_tensorflow index 5369b0135f7618..5d699facad1c91 100644 --- a/tests/requirements_tensorflow +++ b/tests/requirements_tensorflow @@ -4,7 +4,8 @@ # tensorflow 2.16.2 depends on numpy<2.0.0 and >=1.26.0; python_version >= "3.12" numpy==1.26.4; python_version < "3.12" or platform_system == "Darwin" and platform_machine == "x86_64" numpy==2.0.2; python_version >= "3.12" and (platform_system != "Darwin" or platform_machine != "x86_64") -pytest==7.0.1 +pytest==7.0.1; python_version < '3.10' +pytest==7.2.0; python_version >= '3.10' pytest-xdist[psutil]==3.6.1 pytest-html==4.1.1 transformers==4.45.1 @@ -17,7 +18,7 @@ wrapt==1.15.0; python_version >= "3.12" # tensorflow-text is not available for both Windows and ARM platforms tensorflow-text==2.18.0; python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64" tensorflow-hub==0.16.1 -jax==0.4.35; (platform_system != "Darwin" or platform_machine != "x86_64") and python_version > "3.9" +jax==0.4.38; (platform_system != "Darwin" or platform_machine != "x86_64") and python_version > "3.9" # tensorflow 2.16.2 depends on ml-dtypes~=0.3.1 and jax 0.4.35 depends on ml-dtypes>=0.4.0 jax==0.4.33; (platform_system == "Darwin" and platform_machine == "x86_64") and python_version > "3.9" jax==0.4.30; python_version <= "3.9" diff --git a/tools/benchmark_tool/openvino/__init__.py b/tools/benchmark_tool/openvino/__init__.py index 7643f742e0067d..69c678909b1c9e 100644 --- a/tools/benchmark_tool/openvino/__init__.py +++ b/tools/benchmark_tool/openvino/__init__.py @@ -7,7 +7,7 @@ # Required for Windows OS platforms # Note: always top-level try: - from openvino.package_utils import _add_openvino_libs_to_search_path + from openvino.utils import _add_openvino_libs_to_search_path _add_openvino_libs_to_search_path() except ImportError: pass @@ -17,47 +17,6 @@ # # This __init__.py forces checking of runtime modules to propagate errors. # # It is not compared with init files from openvino-dev package. # # - -# Openvino pybind bindings -from openvino._pyopenvino import AxisSet -from openvino._pyopenvino import AxisVector -from openvino._pyopenvino import ConstOutput -from openvino._pyopenvino import Coordinate -from openvino._pyopenvino import CoordinateDiff -from openvino._pyopenvino import DiscreteTypeInfo -from openvino._pyopenvino import Extension -from openvino._pyopenvino import ProfilingInfo -from openvino._pyopenvino import RTMap -from openvino._pyopenvino import Version -from openvino._pyopenvino import Symbol -from openvino._pyopenvino import Dimension -from openvino._pyopenvino import Input -from openvino._pyopenvino import Output -from openvino._pyopenvino import Node -from openvino._pyopenvino import Strides -from openvino._pyopenvino import PartialShape -from openvino._pyopenvino import Shape -from openvino._pyopenvino import Layout -from openvino._pyopenvino import Type -from openvino._pyopenvino import Tensor -from openvino._pyopenvino import OVAny -from openvino._pyopenvino import get_batch -from openvino._pyopenvino import set_batch -from openvino._pyopenvino import serialize -from openvino._pyopenvino import shutdown -from openvino._pyopenvino import save_model -from openvino._pyopenvino import layout_helpers -from openvino._pyopenvino import RemoteContext -from openvino._pyopenvino import RemoteTensor -from openvino._pyopenvino import Op - -# Import public classes from _ov_api -from openvino._ov_api import Model -from openvino._ov_api import Core -from openvino._ov_api import CompiledModel -from openvino._ov_api import InferRequest -from openvino._ov_api import AsyncInferQueue - # Import all public modules from openvino import runtime as runtime from openvino import frontend as frontend @@ -67,10 +26,36 @@ from openvino import utils as utils from openvino import properties as properties +# Import most important classes and functions from openvino.runtime +from openvino._ov_api import Model +from openvino._ov_api import Core +from openvino._ov_api import CompiledModel +from openvino._ov_api import InferRequest +from openvino._ov_api import AsyncInferQueue + +from openvino.runtime import Symbol +from openvino.runtime import Dimension +from openvino.runtime import Strides +from openvino.runtime import PartialShape +from openvino.runtime import Shape +from openvino.runtime import Layout +from openvino.runtime import Type +from openvino.runtime import Tensor +from openvino.runtime import OVAny + # Helper functions for openvino module -from openvino.utils.data_helpers import tensor_from_file +from openvino.runtime.utils.data_helpers import tensor_from_file from openvino._ov_api import compile_model +from openvino.runtime import get_batch +from openvino.runtime import set_batch +from openvino.runtime import serialize +from openvino.runtime import shutdown +from openvino.runtime import save_model +from openvino.runtime import layout_helpers +from openvino._pyopenvino import RemoteContext +from openvino._pyopenvino import RemoteTensor +from openvino._pyopenvino import Op # Import opsets from openvino import opset1 @@ -95,7 +80,7 @@ from openvino._pyopenvino import VASurfaceTensor # Set version for openvino package -from openvino._pyopenvino import get_version +from openvino.runtime import get_version __version__ = get_version() # Tools diff --git a/tools/ovc/openvino/__init__.py b/tools/ovc/openvino/__init__.py index 7643f742e0067d..69c678909b1c9e 100644 --- a/tools/ovc/openvino/__init__.py +++ b/tools/ovc/openvino/__init__.py @@ -7,7 +7,7 @@ # Required for Windows OS platforms # Note: always top-level try: - from openvino.package_utils import _add_openvino_libs_to_search_path + from openvino.utils import _add_openvino_libs_to_search_path _add_openvino_libs_to_search_path() except ImportError: pass @@ -17,47 +17,6 @@ # # This __init__.py forces checking of runtime modules to propagate errors. # # It is not compared with init files from openvino-dev package. # # - -# Openvino pybind bindings -from openvino._pyopenvino import AxisSet -from openvino._pyopenvino import AxisVector -from openvino._pyopenvino import ConstOutput -from openvino._pyopenvino import Coordinate -from openvino._pyopenvino import CoordinateDiff -from openvino._pyopenvino import DiscreteTypeInfo -from openvino._pyopenvino import Extension -from openvino._pyopenvino import ProfilingInfo -from openvino._pyopenvino import RTMap -from openvino._pyopenvino import Version -from openvino._pyopenvino import Symbol -from openvino._pyopenvino import Dimension -from openvino._pyopenvino import Input -from openvino._pyopenvino import Output -from openvino._pyopenvino import Node -from openvino._pyopenvino import Strides -from openvino._pyopenvino import PartialShape -from openvino._pyopenvino import Shape -from openvino._pyopenvino import Layout -from openvino._pyopenvino import Type -from openvino._pyopenvino import Tensor -from openvino._pyopenvino import OVAny -from openvino._pyopenvino import get_batch -from openvino._pyopenvino import set_batch -from openvino._pyopenvino import serialize -from openvino._pyopenvino import shutdown -from openvino._pyopenvino import save_model -from openvino._pyopenvino import layout_helpers -from openvino._pyopenvino import RemoteContext -from openvino._pyopenvino import RemoteTensor -from openvino._pyopenvino import Op - -# Import public classes from _ov_api -from openvino._ov_api import Model -from openvino._ov_api import Core -from openvino._ov_api import CompiledModel -from openvino._ov_api import InferRequest -from openvino._ov_api import AsyncInferQueue - # Import all public modules from openvino import runtime as runtime from openvino import frontend as frontend @@ -67,10 +26,36 @@ from openvino import utils as utils from openvino import properties as properties +# Import most important classes and functions from openvino.runtime +from openvino._ov_api import Model +from openvino._ov_api import Core +from openvino._ov_api import CompiledModel +from openvino._ov_api import InferRequest +from openvino._ov_api import AsyncInferQueue + +from openvino.runtime import Symbol +from openvino.runtime import Dimension +from openvino.runtime import Strides +from openvino.runtime import PartialShape +from openvino.runtime import Shape +from openvino.runtime import Layout +from openvino.runtime import Type +from openvino.runtime import Tensor +from openvino.runtime import OVAny + # Helper functions for openvino module -from openvino.utils.data_helpers import tensor_from_file +from openvino.runtime.utils.data_helpers import tensor_from_file from openvino._ov_api import compile_model +from openvino.runtime import get_batch +from openvino.runtime import set_batch +from openvino.runtime import serialize +from openvino.runtime import shutdown +from openvino.runtime import save_model +from openvino.runtime import layout_helpers +from openvino._pyopenvino import RemoteContext +from openvino._pyopenvino import RemoteTensor +from openvino._pyopenvino import Op # Import opsets from openvino import opset1 @@ -95,7 +80,7 @@ from openvino._pyopenvino import VASurfaceTensor # Set version for openvino package -from openvino._pyopenvino import get_version +from openvino.runtime import get_version __version__ = get_version() # Tools