diff --git a/.github/workflows/build_linux_jax_wheels.yml b/.github/workflows/build_linux_jax_wheels.yml new file mode 100644 index 0000000000000..8b4f18ae5d9a7 --- /dev/null +++ b/.github/workflows/build_linux_jax_wheels.yml @@ -0,0 +1,290 @@ +name: Build Portable Linux JAX Wheels + +on: + workflow_call: + inputs: + amdgpu_family: + required: true + type: string + python_version: + required: true + type: string + release_type: + description: The type of release to build ("dev", "nightly", or "prerelease"). All developer-triggered jobs should use "dev"! + required: true + type: string + s3_subdir: + description: S3 subdirectory, not including the GPU-family + required: true + type: string + s3_staging_subdir: + description: S3 staging subdirectory, not including the GPU-family + required: true + type: string + rocm_version: + description: ROCm version to install + type: string + tar_url: + description: URL to TheRock tarball to build against + type: string + cloudfront_url: + description: CloudFront URL pointing to Python index + required: true + type: string + cloudfront_staging_url: + description: CloudFront base URL pointing to staging Python index + required: true + type: string + repository: + description: "Repository to checkout. Defaults to `ROCm/TheRock`." + type: string + default: "ROCm/TheRock" + ref: + description: "Branch, tag or SHA to checkout. Defaults to the reference or SHA that triggered the workflow." + type: string + workflow_dispatch: + inputs: + amdgpu_family: + type: choice + options: + - gfx101X-dgpu + - gfx103X-dgpu + - gfx110X-all + - gfx1150 + - gfx1151 + - gfx120X-all + - gfx90X-dcgpu + - gfx94X-dcgpu + - gfx950-dcgpu + default: gfx94X-dcgpu + python_version: + required: true + type: string + default: "3.12" + release_type: + type: choice + description: Type of release to create. All developer-triggered jobs should use "dev"! + options: + - dev + - nightly + - prerelease + default: dev + s3_subdir: + description: S3 subdirectory, not including the GPU-family + type: string + default: "v2" + s3_staging_subdir: + description: S3 staging subdirectory, not including the GPU-family + type: string + default: "v2-staging" + rocm_version: + description: ROCm version to install + type: string + tar_url: + description: URL to TheRock tarball to build against + type: string + cloudfront_url: + description: CloudFront base URL pointing to Python index + type: string + default: "https://rocm.devreleases.amd.com/v2" + cloudfront_staging_url: + description: CloudFront base URL pointing to staging Python index + type: string + default: "https://rocm.devreleases.amd.com/v2-staging" + jax_ref: + description: rocm-jax repository ref/branch to check out + type: string + default: rocm-jaxlib-v0.8.0 + +permissions: + id-token: write + contents: read + +run-name: Build Linux JAX Wheels (${{ inputs.amdgpu_family }}, ${{ inputs.python_version }}, ${{ inputs.release_type }}) + +jobs: + build_jax_wheels: + strategy: + matrix: + jax_ref: [rocm-jaxlib-v0.8.0] + name: Build Linux JAX Wheels | ${{ inputs.amdgpu_family }} | Python ${{ inputs.python_version }} + runs-on: ${{ github.repository_owner == 'ROCm' && 'azure-linux-scale-rocm' || 'ubuntu-24.04' }} + env: + PACKAGE_DIST_DIR: ${{ github.workspace }}/jax/jax_rocm_plugin/wheelhouse + S3_BUCKET_PY: "therock-${{ inputs.release_type }}-python" + outputs: + cp_version: ${{ env.cp_version }} + jax_version: ${{ steps.extract_jax_version.outputs.jax_version }} + steps: + - name: Checkout TheRock + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - name: Checkout JAX + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + path: jax + repository: rocm/rocm-jax + ref: ${{ matrix.jax_ref }} + + - name: Configure Git Identity + run: | + git config --global user.name "therockbot" + git config --global user.email "therockbot@amd.com" + + - name: "Setting up Python" + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ inputs.python_version }} + + - name: Select Python version + run: | + python build_tools/github_actions/python_to_cp_version.py \ + --python-version ${{ inputs.python_version }} + + - name: Build JAX Wheels + env: + ROCM_VERSION: ${{ inputs.rocm_version }} + run: | + ls -lah + pushd jax + python3 build/ci_build \ + --compiler=clang \ + --python-versions="${{ inputs.python_version }}" \ + --rocm-version="${ROCM_VERSION}" \ + --therock-path="${{ inputs.tar_url }}" \ + dist_wheels + + - name: Extract JAX version + id: extract_jax_version + run: | + # Extract JAX version from requirements.txt (e.g., "jax==0.8.0") + # Remove all whitespace from requirements.txt to simplify parsing + # Search for lines starting with "jax==" or "jaxlib==" followed by version (excluding comments) + # Extract the version number by splitting on '=' and taking the 3rd field + # [^#]+ matches one or more characters that are NOT '#', ensuring we stop before any inline comments + JAX_VERSION=$(tr -d ' ' < jax/build/requirements.txt \ + | grep -E '^(jax|jaxlib)==[^#]+' | head -n1 | cut -d'=' -f3) + echo "jax_version=$JAX_VERSION" >> "$GITHUB_OUTPUT" + + - name: Install AWS CLI + if: always() + run: bash ./dockerfiles/install_awscli.sh + + - name: Configure AWS Credentials + if: always() + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-${{ inputs.release_type }}-releases + + - name: Upload wheels to S3 + if: ${{ github.repository_owner == 'ROCm' }} + run: | + aws s3 cp ${{ env.PACKAGE_DIST_DIR }}/ s3://${{ env.S3_BUCKET_PY }}/${{ inputs.s3_staging_subdir }}/${{ inputs.amdgpu_family }}/ \ + --recursive --exclude "*" --include "*.whl" + + - name: (Re-)Generate Python package release index + if: ${{ github.repository_owner == 'ROCm' }} + run: | + python3 -m venv .venv + source .venv/bin/activate + pip3 install boto3 packaging + python3 ./build_tools/third_party/s3_management/manage.py ${{ inputs.s3_staging_subdir }}/${{ inputs.amdgpu_family }} + + generate_target_to_run: + name: Generate target_to_run + runs-on: ubuntu-24.04 + outputs: + test_runs_on: ${{ steps.configure.outputs.test-runs-on }} + bypass_tests_for_releases: ${{ steps.configure.outputs.bypass_tests_for_releases }} + steps: + - name: Checking out repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + + - name: Generating target to run + id: configure + env: + TARGET: ${{ inputs.amdgpu_family }} + PLATFORM: "linux" + # Variable comes from ROCm organization variable 'ROCM_THEROCK_TEST_RUNNERS' + ROCM_THEROCK_TEST_RUNNERS: ${{ vars.ROCM_THEROCK_TEST_RUNNERS }} + LOAD_TEST_RUNNERS_FROM_VAR: false + run: python ./build_tools/github_actions/configure_target_run.py + + test_jax_wheels: + name: Test JAX wheels | ${{ inputs.amdgpu_family }} | ${{ needs.generate_target_to_run.outputs.test_runs_on }} + needs: [build_jax_wheels, generate_target_to_run] + permissions: + contents: read + packages: read + uses: ./.github/workflows/test_linux_jax_wheels.yml + with: + amdgpu_family: ${{ inputs.amdgpu_family }} + release_type: ${{ inputs.release_type }} + s3_subdir: ${{ inputs.s3_subdir }} + package_index_url: ${{ inputs.cloudfront_staging_url }} + rocm_version: ${{ inputs.rocm_version }} + tar_url: ${{ inputs.tar_url }} + python_version: ${{ inputs.python_version }} + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + jax_ref: ${{ inputs.jax_ref }} + test_runs_on: ${{ needs.generate_target_to_run.outputs.test_runs_on }} + + upload_jax_wheels: + name: Release JAX Wheels to S3 + needs: [build_jax_wheels, generate_target_to_run, test_jax_wheels] + if: ${{ !cancelled() }} + runs-on: ubuntu-24.04 + env: + S3_BUCKET_PY: "therock-${{ inputs.release_type }}-python" + JAX_VERSION: "${{ needs.build_jax_wheels.outputs.jax_version }}" + ROCM_VERSION: "${{ inputs.rocm_version }}" + CP_VERSION: "${{ needs.build_jax_wheels.outputs.cp_version }}" + + steps: + - name: Checkout + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + + - name: Configure AWS Credentials + if: always() + uses: aws-actions/configure-aws-credentials@00943011d9042930efac3dcd3a170e4273319bc8 # v5.1.0 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-${{ inputs.release_type }}-releases + + - name: Determine upload flag + env: + BUILD_RESULT: ${{ needs.build_jax_wheels.result }} + TEST_RESULT: ${{ needs.test_jax_wheels.result }} + TEST_RUNS_ON: ${{ needs.generate_target_to_run.outputs.test_runs_on }} + BYPASS_TESTS_FOR_RELEASES: ${{ needs.generate_target_to_run.outputs.bypass_tests_for_releases }} + run: python ./build_tools/github_actions/promote_wheels_based_on_policy.py + + - name: Copy JAX wheels from staging to release S3 + if: ${{ env.upload == 'true' }} + run: | + echo "Copying exact tested wheels to release S3 bucket..." + aws s3 cp \ + s3://${S3_BUCKET_PY}/${{ inputs.s3_staging_subdir }}/${{ inputs.amdgpu_family }}/ \ + s3://${S3_BUCKET_PY}/${{ inputs.s3_subdir }}/${{ inputs.amdgpu_family }}/ \ + --recursive \ + --exclude "*" \ + --include "jaxlib-${JAX_VERSION}+rocm${ROCM_VERSION}-${CP_VERSION}-manylinux_2_27_x86_64.whl" \ + --include "jax_rocm7_plugin-${JAX_VERSION}+rocm${ROCM_VERSION}-${CP_VERSION}-manylinux_2_28_x86_64.whl" \ + --include "jax_rocm7_pjrt-${JAX_VERSION}+rocm${ROCM_VERSION}-py3-none-manylinux_2_28_x86_64.whl" + + - name: (Re-)Generate Python package release index + if: ${{ env.upload == 'true' }} + env: + # Environment variables to be set for `manage.py` + CUSTOM_PREFIX: "${{ inputs.s3_subdir }}/${{ inputs.amdgpu_family }}" + run: | + pip install boto3 packaging + python ./build_tools/third_party/s3_management/manage.py ${{ env.CUSTOM_PREFIX }} diff --git a/.github/workflows/build_native_linux_packages.yml b/.github/workflows/build_native_linux_packages.yml new file mode 100644 index 0000000000000..ead640630e25c --- /dev/null +++ b/.github/workflows/build_native_linux_packages.yml @@ -0,0 +1,135 @@ +name: Build Native Linux Packages + +on: + workflow_call: + inputs: + artifact_group: + description: gfx arch group for the s3 server + type: string + default: gfx94X-dcgpu + artifact_run_id: + description: workflow run id to download the artifacts from. + required: true + type: string + rocm_version: + description: ROCm version to append to the package (8.0.0, 8.0.1rc1, ...). + required: true + type: string + native_package_type: + description: Specify whether debian or rpm packages are needed (deb or rpm). + required: true + type: string + package_suffix: + description: The suffix to be added to package name (asan, static or rpath). + required: false + type: string + release_type: + description: The type of release to build ("dev", "nightly", or "prerelease"). All developer-triggered jobs should use "dev"! + required: false + type: string + workflow_dispatch: + inputs: + artifact_group: + type: string + default: gfx94X-dcgpu + artifact_run_id: + description: workflow run id to download the artifacts from + type: string + rocm_version: + description: ROCm version to append to the package (8.0.0, 8.0.1rc1, ...). + type: string + default: "0.0.1" + native_package_type: + description: Specify whether debian or rpm packages are needed (deb or rpm). + required: true + type: choice + options: + - rpm + - deb + default: "rpm" + package_suffix: + description: The suffix to be added to package name (asan, static or rpath). + type: string + required: false + release_type: + description: The type of release to build ("dev", "nightly", or "prerelease"). All developer-triggered jobs should use "dev"! + type: string + default: "dev" + +permissions: + id-token: write + contents: read + +run-name: Build native Linux packages (${{ inputs.artifact_group }}, ${{ inputs.rocm_version }}, ${{ inputs.native_package_type }}, ${{ inputs.package_suffix }}, ${{ inputs.release_type }}) + +jobs: + build_native_packages: + name: Build Linux native Packages + strategy: + fail-fast: false + runs-on: ${{ github.repository_owner == 'ROCm' && 'azure-linux-scale-rocm' || 'ubuntu-24.04' }} + env: + BUILD_IMAGE: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:583d473f263a289222c48d4b493e2956b2354a45796f09dee6f2c8ecd4504ab6 + ARTIFACT_RUN_ID: ${{ inputs.artifact_run_id || github.run_id }} + PACKAGE_SUFFIX: ${{ inputs.package_suffix != '' && inputs.package_suffix || '' }} + OUTPUT_DIR: ${{ github.workspace }}/output + ARTIFACTS_DIR: ${{ github.workspace }}/output/artifacts + PACKAGE_DIST_DIR: ${{ github.workspace }}/output/packages + RELEASE_TYPE: ${{ inputs.release_type || '' }} + steps: + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: '3.12' + - name: Install Python requirements + run: | + pip install pyelftools boto3 jinja2 + + - name: Install System requirements + run: | + # Install the needed tools for creating rpm / deb packages + # Also install tools for creating repo files + sudo apt update + sudo apt install -y llvm + sudo apt install -y rpm debhelper-compat build-essential + sudo apt install -y dpkg-dev createrepo-c + + - name: Fetch Artifacts + run: | + echo "Fetching artifacts for build ${{ inputs.artifact_run_id }}" + python ./build_tools/fetch_artifacts.py \ + --run-id=${{ env.ARTIFACT_RUN_ID }} \ + --run-github-repo="ROCm/TheRock" \ + --artifact-group=${{ inputs.artifact_group }} \ + --output-dir=${{ env.ARTIFACTS_DIR }} + + - name: Build Packages + id: build-packages + run: | + echo "Building ${{ inputs.native_package_type }} packages for ${{ inputs.artifact_group }} ${{ inputs.artifact_run_id }}" + python ./build_tools/packaging/linux/build_package.py \ + --dest-dir ${{ env.PACKAGE_DIST_DIR }} \ + --rocm-version ${{ inputs.rocm_version }} \ + --target ${{ inputs.artifact_group }} \ + --artifacts-dir ${{ env.ARTIFACTS_DIR }} \ + --pkg-type ${{ inputs.native_package_type }} \ + --version-suffix ${{ env.ARTIFACT_RUN_ID }} + + - name: Install AWS CLI + run: bash ./dockerfiles/install_awscli.sh + + - name: Configure AWS Credentials for non-forked repos + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-artifacts-external + + - name: Upload Package repo to S3 + id: upload-packages + run: | + echo "Uploading to s3 bucket" + python ./build_tools/packaging/linux/upload_package_repo.py \ + --pkg-type ${{ inputs.native_package_type }} \ + --s3-bucket therock-deb-rpm-test \ + --amdgpu-family ${{ inputs.artifact_group }} \ + --artifact-id ${{ env.ARTIFACT_RUN_ID }} diff --git a/.github/workflows/build_portable_linux_artifacts.yml b/.github/workflows/build_portable_linux_artifacts.yml new file mode 100644 index 0000000000000..a1f7a87b61af2 --- /dev/null +++ b/.github/workflows/build_portable_linux_artifacts.yml @@ -0,0 +1,220 @@ +name: Build Portable Linux Artifacts + +on: + workflow_dispatch: + inputs: + amdgpu_families: + type: string + default: gfx94X-dcgpu + artifact_group: + type: string + default: gfx94X-dcgpu + build_variant_label: + type: string + description: "A label for the build variant (ex: 'release', 'asan')" + default: "release" + build_variant_suffix: + type: string + description: "The build variant suffix (ex: 'asan' suffix -> 'gfx94X-dcgpu-asan')" + default: "" + build_variant_cmake_preset: + type: string + description: "The name of the cmake preset to use for this build variant, matching an entry in CMakePresets.json (ex: 'linux-release-asan')" + default: "" + package_version: + type: string + default: ADHOCBUILD + expect_failure: + type: boolean + default: false + extra_cmake_options: + type: string + + workflow_call: + inputs: + package_version: + type: string + default: ADHOCBUILD + amdgpu_families: + type: string + artifact_group: + type: string + build_variant_label: + type: string + build_variant_suffix: + type: string + build_variant_cmake_preset: + type: string + expect_failure: + type: boolean + extra_cmake_options: + type: string + +# See the details regarding permissions from the link: +# https://github.com/aws-actions/configure-aws-credentials?tab=readme-ov-file#oidc +permissions: + contents: read + +jobs: + build_portable_linux_artifacts: + name: Build (xfail ${{ inputs.expect_failure }}) + # azure-linux-scale-rocm are used for regular CI builds + # azure-linux-scale-rocm-heavy are used for CI builds that require more resources (ex: ASAN builds) + runs-on: ${{ inputs.build_variant_label == 'asan' && 'azure-linux-u2404-hx176-cpu-rocm' || 'azure-linux-scale-rocm' }} + continue-on-error: ${{ inputs.expect_failure }} + timeout-minutes: 720 # 12 hour timeout + permissions: + id-token: write + container: + image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:583d473f263a289222c48d4b493e2956b2354a45796f09dee6f2c8ecd4504ab6 + options: -v /runner/config:/home/awsconfig/ + env: + AWS_SHARED_CREDENTIALS_FILE: /home/awsconfig/credentials.ini + CACHE_DIR: ${{ github.workspace }}/.container-cache + # The ccache.conf will be written by setup_ccache.py before this gets used. + CCACHE_CONFIGPATH: ${{ github.workspace }}/.ccache/ccache.conf + AMDGPU_FAMILIES: ${{ inputs.amdgpu_families }} + TEATIME_FORCE_INTERACTIVE: 0 + IS_PR_FROM_FORK: ${{ github.event.pull_request.head.repo.fork }} + steps: + - name: Checkout TheRock repository + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + repository: "ROCm/TheRock" + ref: ${{ secrets.THEROCK_MAINLINE_REF }} + fetch-depth: 10 + + - name: Install python deps + run: | + pip install -r requirements.txt + + # safe.directory must be set before Runner Health Status + - name: Adjust git config + run: | + git config --global --add safe.directory $PWD + git config fetch.parallel 10 + + # TODO: We shouldn't be using a cache on actual release branches, but it + # really helps for iteration time. + - name: Setup ccache + run: | + ./build_tools/setup_ccache.py \ + --config-preset "github-oss-presubmit" \ + --dir "$(dirname $CCACHE_CONFIGPATH)" \ + --local-path "$CACHE_DIR/ccache" + + - name: Runner health status + run: | + ./build_tools/health_status.py + + - name: Test build_tools + run: | + python -m pytest build_tools/tests build_tools/github_actions/tests + + - name: Fetch sources + timeout-minutes: 30 + run: | + ./build_tools/fetch_sources.py --jobs 12 + + - name: "Checking out repository for llvm-project" + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + path: compiler/amd-llvm + + - name: "Checking out repository for spriv-llvm-translator" + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + repository: "ROCm/spirv-llvm-translator" + path: compiler/spirv-llvm-translator + ref: ${{ secrets.SPIRV_LLVM_TRANSLATOR_MAINLINE_REF }} + + - name: "Checking out repository for hipify" + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + repository: "ROCm/hipify" + path: compiler/hipify + ref: ${{ secrets.HIPIFY_MAINLINE_REF }} + + - name: Apply patches + run: | + cp -v patches/amd-mainline/llvm-project/*.patch compiler/amd-llvm + cd compiler/amd-llvm + git log -10 + git config --global --add safe.directory $PWD + find . -type f -name '*.patch' -exec git apply --check {} \; + find . -type f -name '*.patch' -exec git apply {} \; + git log -15 + cd - + + - name: TheRock and llvm SHA + run: | + git config --global --add safe.directory $PWD + git log -1 + cd compiler/amd-llvm/llvm + git log -3 + cd - + + - name: Configure Projects + env: + cmake_preset: ${{ inputs.build_variant_cmake_preset }} + amdgpu_families: ${{ inputs.amdgpu_families }} + package_version: ${{ inputs.package_version }} + extra_cmake_options: ${{ inputs.extra_cmake_options }} + BUILD_DIR: build + run: | + python3 build_tools/github_actions/build_configure.py --manylinux + + - name: Build therock-archives and therock-dist + run: | + cmake --build build --target therock-archives therock-dist -- -k 0 + + - name: Test Packaging + if: ${{ github.event.repository.name == 'TheRock' }} + run: | + ctest --test-dir build --output-on-failure + + - name: Report + if: ${{ !cancelled() }} + shell: bash + run: | + if [ -d "./build" ]; then + echo "Full SDK du:" + echo "------------" + du -h -d 1 build/dist/rocm + echo "Artifact Archives:" + echo "------------------" + ls -lh build/artifacts/*.tar.xz + echo "Artifacts:" + echo "----------" + du -h -d 1 build/artifacts + echo "CCache Stats:" + echo "-------------" + ccache -s -v + tail -v -n +1 .ccache/compiler_check_cache/* > build/logs/ccache_compiler_check_cache.log + else + echo "[ERROR] Build directory ./build does not exist. Skipping report!" + echo " This should only happen if the CI is cancelled before the build step." + exit 1 + fi + + # Analyze ninja build log to generate per-component timing report + - name: Analyze Build Times + if: ${{ !cancelled() }} + run: | + python3 build_tools/analyze_build_times.py --build-dir build + + - name: Configure AWS Credentials for non-forked repos + if: ${{ always() && !github.event.pull_request.head.repo.fork }} + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-ci + + - name: Post Build Upload + if: always() + run: | + python3 build_tools/github_actions/post_build_upload.py \ + --run-id ${{ github.run_id }} \ + --artifact-group "${{ inputs.artifact_group }}" \ + --build-dir build \ + --upload diff --git a/.github/workflows/build_portable_linux_python_packages.yml b/.github/workflows/build_portable_linux_python_packages.yml new file mode 100644 index 0000000000000..69390ff9f472f --- /dev/null +++ b/.github/workflows/build_portable_linux_python_packages.yml @@ -0,0 +1,95 @@ +name: Build Portable Linux Python Packages + +on: + workflow_dispatch: + inputs: + artifact_github_repo: + description: GitHub repository for artifact_run_id + type: string + default: ROCm/TheRock + artifact_run_id: + description: Workflow run ID to download artifacts from + type: string + default: "17865324892" # TODO: default to the most recent successful run (using a script) + artifact_group: + description: "The artifact group to build (ex: gfx94X-dcgpu, gfx101X-dgpu, gfx1151, gfx120X-all)" + type: string + package_version: + type: string + workflow_call: + inputs: + artifact_github_repo: + type: string + artifact_run_id: + type: string + default: "" + artifact_group: + type: string + package_version: + type: string + +permissions: + contents: read + +run-name: Build portable Linux Python Packages (${{ inputs.artifact_group }}, ${{ inputs.package_version }}) + +jobs: + build: + name: Build Python | ${{ inputs.artifact_group }} + # Note: GitHub-hosted runners run out of disk space for some gpu families + runs-on: ${{ github.repository_owner == 'ROCm' && 'azure-linux-scale-rocm' || 'ubuntu-24.04' }} + env: + BUILD_IMAGE: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:583d473f263a289222c48d4b493e2956b2354a45796f09dee6f2c8ecd4504ab6 + ARTIFACT_RUN_ID: "${{ inputs.artifact_run_id != '' && inputs.artifact_run_id || github.run_id }}" + ARTIFACTS_DIR: "${{ github.workspace }}/artifacts" + PACKAGES_DIR: "${{ github.workspace }}/packages" + MANYLINUX: 1 + + steps: + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: "ROCm/TheRock" + ref: ${{ secrets.THEROCK_MAINLINE_REF }} + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: '3.12' + + - name: Install Python requirements + run: pip install boto3 packaging piprepo setuptools + + # Note: we could fetch "all" artifacts if we wanted to include more files + - name: Fetch artifacts + env: + IS_PR_FROM_FORK: ${{ github.event.pull_request.head.repo.fork }} + run: | + python ./build_tools/fetch_artifacts.py \ + --run-github-repo=${{ inputs.artifact_github_repo }} \ + --run-id=${{ env.ARTIFACT_RUN_ID }} \ + --artifact-group=${{ inputs.artifact_group }} \ + --output-dir=${{ env.ARTIFACTS_DIR }} \ + _dev_ _lib_ _run_ + + - name: Build Python packages + run: | + ./build_tools/linux_portable_build.py \ + --image=${{ env.BUILD_IMAGE }} \ + --output-dir=${{ env.PACKAGES_DIR }} \ + --artifact-dir=${{ env.ARTIFACTS_DIR }} \ + --build-python-only \ + -- \ + "--version=${{ inputs.package_version }}" + + - name: Inspect Python packages + run: | + ls -la "${{ env.PACKAGES_DIR }}" + + # TODO(#1559): Sanity check (Linux can't find the directories, maybe Docker issues?) + + # - name: Sanity check Python packages + # run: | + # piprepo build "${{ env.PACKAGES_DIR }}/dist" + # pip install rocm[devel]==${{ inputs.package_version }} \ + # --extra-index-url "${{ env.PACKAGES_DIR }}/dist/simple/" + # rocm-sdk test + + # TODO(#1559): upload packages to artifacts S3 bucket and/or a dedicated Python packages bucket diff --git a/.github/workflows/build_portable_linux_pytorch_wheels.yml b/.github/workflows/build_portable_linux_pytorch_wheels.yml new file mode 100644 index 0000000000000..59a811ee6c0f2 --- /dev/null +++ b/.github/workflows/build_portable_linux_pytorch_wheels.yml @@ -0,0 +1,325 @@ +name: Build Portable Linux PyTorch Wheels + +on: + workflow_call: + inputs: + amdgpu_family: + required: true + type: string + python_version: + required: true + type: string + release_type: + description: The type of release to build ("dev", "nightly", or "prerelease"). All developer-triggered jobs should use "dev"! + required: true + type: string + s3_subdir: + description: S3 subdirectory, not including the GPU-family + required: true + type: string + s3_staging_subdir: + description: S3 staging subdirectory, not including the GPU-family + required: true + type: string + cloudfront_url: + description: CloudFront URL pointing to Python index + required: true + type: string + cloudfront_staging_url: + description: CloudFront base URL pointing to staging Python index + required: true + type: string + rocm_version: + description: ROCm version to pip install (e.g. "7.10.0a20251124") + type: string + pytorch_git_ref: + description: PyTorch ref to checkout. (typically "nightly", or "release/X.Y") + required: true + type: string + pytorch_patchset: + description: Patch directory name from where to apply existing patches. + required: true + type: string + repository: + description: "Repository to checkout. Otherwise, defaults to `github.repository`." + type: string + ref: + description: "Branch, tag or SHA to checkout. Defaults to the reference or SHA that triggered the workflow." + type: string + workflow_dispatch: + inputs: + amdgpu_family: + type: choice + options: + - gfx101X-dgpu + - gfx103X-dgpu + - gfx110X-all + - gfx1150 + - gfx1151 + - gfx120X-all + - gfx90X-dcgpu + - gfx94X-dcgpu + - gfx950-dcgpu + default: gfx94X-dcgpu + python_version: + required: true + type: string + default: "3.12" + release_type: + description: The type of release to build ("dev", "nightly", or "prerelease"). All developer-triggered jobs should use "dev"! + type: string + default: "dev" + s3_subdir: + description: S3 subdirectory, not including the GPU-family + type: string + default: "v2" + s3_staging_subdir: + description: S3 staging subdirectory, not including the GPU-family + type: string + default: "v2-staging" + cloudfront_url: + description: CloudFront base URL pointing to Python index + type: string + default: "https://rocm.devreleases.amd.com/v2" + cloudfront_staging_url: + description: CloudFront base URL pointing to staging Python index + type: string + default: "https://rocm.devreleases.amd.com/v2-staging" + rocm_version: + description: ROCm version to pip install (e.g. "7.10.0a20251124") + type: string + pytorch_git_ref: + description: PyTorch ref to checkout. (typically "nightly", or "release/X.Y") + required: true + type: string + default: "release/2.7" + pytorch_patchset: + description: Patch directory name from where to apply existing patches. + required: true + type: string + default: "rocm_2.7" + +permissions: + id-token: write + contents: read + +run-name: Build portable Linux PyTorch Wheels (${{ inputs.amdgpu_family }}, ${{ inputs.python_version }}, ${{ inputs.release_type }}) + +jobs: + build_pytorch_wheels: + name: Build | ${{ inputs.amdgpu_family }} | py ${{ inputs.python_version }} | torch ${{ inputs.pytorch_git_ref }} + runs-on: ${{ github.repository_owner == 'ROCm' && 'azure-linux-scale-rocm' || 'ubuntu-24.04' }} + container: + image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:583d473f263a289222c48d4b493e2956b2354a45796f09dee6f2c8ecd4504ab6 + env: + OUTPUT_DIR: ${{ github.workspace }}/output + PACKAGE_DIST_DIR: ${{ github.workspace }}/output/packages/dist + S3_BUCKET_PY: "therock-${{ inputs.release_type }}-python" + optional_build_prod_arguments: "" + outputs: + cp_version: ${{ env.cp_version }} + # The following are python package versions produced by the build. The + # exact versions will depend on workflow inputs and the underlying code. + # For example: + # Inputs + # rocm_version : 7.10.0a20251120 + # pytorch_git_ref : release/2.9 + # Outputs + # torch_version : 2.9.1+rocm7.10.0a20251120 + # torchaudio_version : 2.9.0+rocm7.10.0a20251120 + # torchvision_version: 0.24.0+rocm7.10.0a20251120 + # triton_version : 3.5.1+rocm7.10.0a20251120 + # Future jobs can use these version outputs to identify newly built + # packages, for example via `pip install torch==${TORCH_VERSION}`. + torch_version: ${{ steps.build-pytorch-wheels.outputs.torch_version }} + torchaudio_version: ${{ steps.build-pytorch-wheels.outputs.torchaudio_version }} + torchvision_version: ${{ steps.build-pytorch-wheels.outputs.torchvision_version }} + triton_version: ${{ steps.build-pytorch-wheels.outputs.triton_version }} + steps: + - name: Checkout + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + + - name: Configure Git Identity + run: | + git config --global user.name "therockbot" + git config --global user.email "therockbot@amd.com" + + - name: Select Python version + run: | + python build_tools/github_actions/python_to_cp_version.py \ + --python-version ${{ inputs.python_version }} + + - name: Add selected Python version to PATH + run: | + python_dir="/opt/python/${{ env.cp_version }}" + if ! [ -x "${python_dir}/bin/python" ]; then + echo "ERROR: Could not find python: ${python_dir}" + exit 1 + fi + echo "${python_dir}/bin" >> "$GITHUB_PATH" + + # Checkout nightly sources from https://github.com/pytorch/pytorch + - name: Checkout PyTorch Source Repos from nightly branch + if: ${{ inputs.pytorch_git_ref == 'nightly' }} + run: | + ./external-builds/pytorch/pytorch_torch_repo.py checkout --repo-hashtag nightly + ./external-builds/pytorch/pytorch_audio_repo.py checkout --repo-hashtag nightly + ./external-builds/pytorch/pytorch_vision_repo.py checkout --repo-hashtag nightly + ./external-builds/pytorch/pytorch_triton_repo.py checkout --patch --patchset nightly + + # Checkout stable sources from https://github.com/ROCm/pytorch + - name: Checkout PyTorch Source Repos from stable branch + if: ${{ inputs.pytorch_git_ref != 'nightly' }} + run: | + ./external-builds/pytorch/pytorch_torch_repo.py checkout --gitrepo-origin https://github.com/ROCm/pytorch.git --repo-hashtag ${{ inputs.pytorch_git_ref }} --patchset ${{ inputs.pytorch_patchset }} + ./external-builds/pytorch/pytorch_audio_repo.py checkout --require-related-commit + ./external-builds/pytorch/pytorch_vision_repo.py checkout --require-related-commit + ./external-builds/pytorch/pytorch_triton_repo.py checkout + + - name: Create pip cache directory + run: mkdir -p /tmp/pipcache + + - name: Determine optional arguments passed to `build_prod_wheels.py` + if: ${{ inputs.rocm_version }} + run: | + pip install packaging + python build_tools/github_actions/determine_version.py \ + --rocm-version ${{ inputs.rocm_version }} + + - name: Build PyTorch Wheels + id: build-pytorch-wheels + run: | + echo "Building PyTorch wheels for ${{ inputs.amdgpu_family }}" + ./external-builds/pytorch/build_prod_wheels.py \ + build \ + --install-rocm \ + --pip-cache-dir /tmp/pipcache \ + --index-url "${{ inputs.cloudfront_url }}/${{ inputs.amdgpu_family }}/" \ + --clean \ + --output-dir ${{ env.PACKAGE_DIST_DIR }} ${{ env.optional_build_prod_arguments }} + python ./build_tools/github_actions/write_torch_versions.py --dist-dir ${{ env.PACKAGE_DIST_DIR }} + + - name: Sanity Check Wheel + run: | + python external-builds/pytorch/sanity_check_wheel.py ${{ env.PACKAGE_DIST_DIR }}/ + + - name: Configure AWS Credentials + if: always() + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-${{ inputs.release_type }}-releases + + - name: Upload wheels to S3 staging + if: ${{ github.repository_owner == 'ROCm' }} + run: | + aws s3 cp ${{ env.PACKAGE_DIST_DIR }}/ s3://${{ env.S3_BUCKET_PY }}/${{ inputs.s3_staging_subdir }}/${{ inputs.amdgpu_family }}/ \ + --recursive --exclude "*" --include "*.whl" + + - name: (Re-)Generate Python package release index for staging + if: ${{ github.repository_owner == 'ROCm' }} + env: + # Environment variables to be set for `manage.py` + CUSTOM_PREFIX: "${{ inputs.s3_staging_subdir }}/${{ inputs.amdgpu_family }}" + run: | + pip install boto3 packaging + python ./build_tools/third_party/s3_management/manage.py ${{ env.CUSTOM_PREFIX }} + + generate_target_to_run: + name: Generate target_to_run + runs-on: ubuntu-24.04 + outputs: + test_runs_on: ${{ steps.configure.outputs.test-runs-on }} + bypass_tests_for_releases: ${{ steps.configure.outputs.bypass_tests_for_releases }} + steps: + - name: Checking out repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + + - name: Generating target to run + id: configure + env: + TARGET: ${{ inputs.amdgpu_family }} + PLATFORM: "linux" + # Variable comes from ROCm organization variable 'ROCM_THEROCK_TEST_RUNNERS' + ROCM_THEROCK_TEST_RUNNERS: ${{ vars.ROCM_THEROCK_TEST_RUNNERS }} + LOAD_TEST_RUNNERS_FROM_VAR: false + run: python ./build_tools/github_actions/configure_target_run.py + + test_pytorch_wheels: + name: Test | ${{ inputs.amdgpu_family }} | ${{ needs.generate_target_to_run.outputs.test_runs_on }} + if: ${{ needs.generate_target_to_run.outputs.test_runs_on != '' }} + needs: [build_pytorch_wheels, generate_target_to_run] + uses: ./.github/workflows/test_pytorch_wheels.yml + with: + amdgpu_family: ${{ inputs.amdgpu_family }} + test_runs_on: ${{ needs.generate_target_to_run.outputs.test_runs_on }} + package_index_url: ${{ inputs.cloudfront_staging_url }} + python_version: ${{ inputs.python_version }} + torch_version: ${{ needs.build_pytorch_wheels.outputs.torch_version }} + pytorch_git_ref: ${{ inputs.pytorch_git_ref }} + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + + upload_pytorch_wheels: + name: Release PyTorch Wheels to S3 + needs: [build_pytorch_wheels, generate_target_to_run, test_pytorch_wheels] + if: ${{ !cancelled() }} + runs-on: ubuntu-24.04 + env: + S3_BUCKET_PY: "therock-${{ inputs.release_type }}-python" + CP_VERSION: "${{ needs.build_pytorch_wheels.outputs.cp_version }}" + TORCH_VERSION: "${{ needs.build_pytorch_wheels.outputs.torch_version }}" + TORCHAUDIO_VERSION: "${{ needs.build_pytorch_wheels.outputs.torchaudio_version }}" + TORCHVISION_VERSION: "${{ needs.build_pytorch_wheels.outputs.torchvision_version }}" + TRITON_VERSION: "${{ needs.build_pytorch_wheels.outputs.triton_version }}" + + steps: + - name: Checkout + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + + - name: Configure AWS Credentials + if: always() + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-${{ inputs.release_type }}-releases + + - name: Determine upload flag + env: + BUILD_RESULT: ${{ needs.build_pytorch_wheels.result }} + TEST_RESULT: ${{ needs.test_pytorch_wheels.result }} + TEST_RUNS_ON: ${{ needs.generate_target_to_run.outputs.test_runs_on }} + BYPASS_TESTS_FOR_RELEASES: ${{ needs.generate_target_to_run.outputs.bypass_tests_for_releases }} + run: python ./build_tools/github_actions/promote_wheels_based_on_policy.py + + - name: Copy PyTorch wheels from staging to release S3 + if: ${{ env.upload == 'true' }} + run: | + echo "Copying exact tested wheels to release S3 bucket..." + aws s3 cp \ + s3://${S3_BUCKET_PY}/${{ inputs.s3_staging_subdir }}/${{ inputs.amdgpu_family }}/ \ + s3://${S3_BUCKET_PY}/${{ inputs.s3_subdir }}/${{ inputs.amdgpu_family }}/ \ + --recursive \ + --exclude "*" \ + --include "torch-${TORCH_VERSION}-${CP_VERSION}-linux_x86_64.whl" \ + --include "torchaudio-${TORCHAUDIO_VERSION}-${CP_VERSION}-linux_x86_64.whl" \ + --include "torchvision-${TORCHVISION_VERSION}-${CP_VERSION}-linux_x86_64.whl" \ + --include "triton-${TRITON_VERSION}-${CP_VERSION}-linux_x86_64.whl" + + - name: (Re-)Generate Python package release index + if: ${{ env.upload == 'true' }} + env: + # Environment variables to be set for `manage.py` + CUSTOM_PREFIX: "${{ inputs.s3_subdir }}/${{ inputs.amdgpu_family }}" + run: | + pip install boto3 packaging + python ./build_tools/third_party/s3_management/manage.py ${{ env.CUSTOM_PREFIX }} diff --git a/.github/workflows/build_windows_artifacts.yml b/.github/workflows/build_windows_artifacts.yml new file mode 100644 index 0000000000000..68ddfa76a1aab --- /dev/null +++ b/.github/workflows/build_windows_artifacts.yml @@ -0,0 +1,230 @@ +name: Build Windows Artifacts + +on: + workflow_dispatch: + inputs: + amdgpu_families: + type: string + default: gfx1151 + artifact_group: + type: string + default: gfx1151 + build_variant_label: + type: string + description: "A label for the build variant (ex: 'release', 'asan')" + default: "release" + build_variant_suffix: + type: string + description: "The build variant suffix (ex: 'asan' suffix -> 'gfx94X-dcgpu-asan')" + default: "" + build_variant_cmake_preset: + type: string + description: "The name of the cmake preset to use for this build variant, matching an entry in CMakePresets.json (ex: 'linux-release-asan')" + default: "" + package_version: + type: string + default: ADHOCBUILD + expect_failure: + type: boolean + extra_cmake_options: + type: string + + workflow_call: + inputs: + package_version: + type: string + default: ADHOCBUILD + amdgpu_families: + type: string + artifact_group: + type: string + build_variant_label: + type: string + build_variant_suffix: + type: string + build_variant_cmake_preset: + type: string + expect_failure: + type: boolean + extra_cmake_options: + type: string + +permissions: + contents: read + +jobs: + build_windows_artifacts: + name: Build ${{ inputs.build_variant_label }} (xfail ${{ inputs.expect_failure }}) + runs-on: azure-windows-scale-rocm + continue-on-error: ${{ inputs.expect_failure }} + timeout-minutes: 720 # 12 hour timeout + permissions: + id-token: write + defaults: + run: + shell: bash + strategy: + fail-fast: true + env: + BUILD_DIR: B:\build + CACHE_DIR: "${{github.workspace}}/.cache" + CCACHE_DIR: "${{github.workspace}}/.cache/ccache" + CCACHE_MAXSIZE: "4000M" + TEATIME_FORCE_INTERACTIVE: 0 + AMDGPU_FAMILIES: ${{ inputs.amdgpu_families }} + IS_PR_FROM_FORK: ${{ github.event.pull_request.head.repo.fork }} + steps: + - name: Checkout TheRock repository + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + repository: "ROCm/TheRock" + ref: ${{ secrets.THEROCK_MAINLINE_REF }} + fetch-depth: 10 + + - name: SHA of TheRock + run: | + git rev-parse HEAD + git log -1 + + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: 3.12 + + - name: Install python deps + run: | + pip install -r requirements.txt + + - name: Install requirements + # The first two lines removes the default commmunity feed and uses the internal proxy feed + run: | + choco source disable -n=chocolatey + choco source add -n=internal -s http://10.0.167.96:8081/repository/choco-group/ --priority=1 + choco install --no-progress -y ccache + # ninja pinned due to a bug in the 1.13.0 release: + # https://github.com/ninja-build/ninja/issues/2616 + choco install --no-progress -y ninja --version 1.12.1 + choco install --no-progress -y strawberryperl + echo "$PATH;C:\Strawberry\c\bin" >> $GITHUB_PATH + choco install --no-progress -y awscli + choco install --no-progress -y pkgconfiglite + echo "$PATH;C:\Program Files\Amazon\AWSCLIV2" >> $GITHUB_PATH + + - uses: iterative/setup-dvc@4bdfd2b0f6f1ad7e08afadb03b1a895c352a5239 # v2.0.0 + with: + version: '3.62.0' + + # After other installs, so MSVC get priority in the PATH. + - name: Configure MSVC + uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0 + + - name: Runner health status + run: | + ccache --zero-stats + python ./build_tools/health_status.py + + - name: Test build_tools + run: | + python -m pytest build_tools/tests build_tools/github_actions/tests + + # TODO: We shouldn't be using a cache on actual release branches, but it + # really helps for iteration time. + - name: Enable cache + uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + path: ${{ env.CACHE_DIR }} + key: windows-build-packages-v4-${{ inputs.amdgpu_families }}-${{ github.sha }} + restore-keys: | + windows-build-packages-v4-${{ inputs.amdgpu_families }}- + + - name: Fetch sources + timeout-minutes: 30 + run: | + git config fetch.parallel 10 + git config --global core.symlinks true + git config --global core.longpaths true + python ./build_tools/fetch_sources.py --jobs 12 + + - name: "Checking out repository for llvm-project" + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + path: compiler/amd-llvm + + - name: Apply patches + run: | + cp -v patches/amd-mainline/llvm-project/*.patch compiler/amd-llvm + cd compiler/amd-llvm + git config --global --add safe.directory /__w/llvm-project/llvm-project + find . -type f -name '*.patch' -exec git apply --check {} \; + find . -type f -name '*.patch' -exec git apply {} \; + git log -15 + cd - + + - name: Configure Projects + env: + cmake_preset: ${{ inputs.build_variant_cmake_preset }} + amdgpu_families: ${{ inputs.amdgpu_families }} + package_version: ${{ inputs.package_version }} + extra_cmake_options: ${{ inputs.extra_cmake_options }} + run: | + # clear cache before build and after download + ccache -z + python3 build_tools/github_actions/build_configure.py + + - name: Build therock-archives and therock-dist + run: cmake --build "${{ env.BUILD_DIR }}" --target therock-archives therock-dist -- -k 0 + + - name: Report + if: ${{ !cancelled() }} + shell: bash + run: | + if [ -d "${{ env.BUILD_DIR }}" ]; then + echo "Build dir:" + echo "------------" + ls -lh "${{ env.BUILD_DIR }}" + echo "Artifact Archives:" + echo "------------------" + ls -lh "${{ env.BUILD_DIR }}"/artifacts/*.tar.xz + echo "Artifacts:" + echo "----------" + du -h -d 1 "${{ env.BUILD_DIR }}"/artifacts + echo "CCache Stats:" + echo "-------------" + ccache -s + else + echo "[ERROR] Build directory ${{ env.BUILD_DIR }} does not exist. Skipping report!" + echo " This should only happen if the CI is cancelled before the build step." + exit 1 + fi + + - name: "Build size report" + if: always() + shell: powershell + run: | + $fs = Get-PSDrive -PSProvider "FileSystem" + $fsout = $fs | Select-Object -Property Name,Used,Free,Root + $fsout | % {$_.Used/=1GB;$_.Free/=1GB;$_} | Write-Host + get-disk | Select-object @{Name="Size(GB)";Expression={$_.Size/1GB}} | Write-Host + + - name: Configure AWS Credentials for non-forked repos + if: ${{ always() && !github.event.pull_request.head.repo.fork }} + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-ci + special-characters-workaround: true + + - name: Post Build Upload + if: always() + run: | + python3 build_tools/github_actions/post_build_upload.py \ + --run-id ${{ github.run_id }} \ + --artifact-group ${{ inputs.artifact_group }} \ + --build-dir ${{ env.BUILD_DIR }} \ + --upload + + - name: Save cache + uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + if: ${{ !cancelled() }} + with: + path: ${{ env.CACHE_DIR }} + key: windows-build-packages-v4-${{ inputs.amdgpu_families }}-${{ github.sha }} diff --git a/.github/workflows/build_windows_python_packages.yml b/.github/workflows/build_windows_python_packages.yml new file mode 100644 index 0000000000000..40c3d184a0b8d --- /dev/null +++ b/.github/workflows/build_windows_python_packages.yml @@ -0,0 +1,87 @@ +name: Build Windows Python Packages + +on: + workflow_dispatch: + inputs: + artifact_github_repo: + description: GitHub repository for artifact_run_id + type: string + default: ROCm/TheRock + artifact_run_id: + description: Workflow run ID to download artifacts from + type: string + default: "17865324892" # TODO: default to the most recent successful run (using a script) + artifact_group: + description: "The artifact group to build (ex: gfx94X-dcgpu, gfx101X-dgpu, gfx1151, gfx120X-all)" + type: string + package_version: + type: string + workflow_call: + inputs: + artifact_github_repo: + type: string + artifact_run_id: + type: string + default: "" + artifact_group: + type: string + package_version: + type: string + +permissions: + contents: read + +jobs: + build: + name: Build Python | ${{ inputs.artifact_group }} + runs-on: ${{ github.repository_owner == 'ROCm' && 'azure-windows-scale-rocm' || 'windows-2022' }} + env: + ARTIFACT_RUN_ID: "${{ inputs.artifact_run_id != '' && inputs.artifact_run_id || github.run_id }}" + ARTIFACTS_DIR: "${{ github.workspace }}/artifacts" + PACKAGES_DIR: "${{ github.workspace }}/packages" + defaults: + run: + shell: bash + steps: + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: "ROCm/TheRock" + ref: ${{ secrets.THEROCK_MAINLINE_REF }} + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: '3.12' + + - name: Install Python requirements + run: pip install boto3 packaging piprepo setuptools + + # Note: we could fetch "all" artifacts if we wanted to include more files + - name: Fetch artifacts + env: + IS_PR_FROM_FORK: ${{ github.event.pull_request.head.repo.fork }} + run: | + python ./build_tools/fetch_artifacts.py \ + --run-github-repo=${{ inputs.artifact_github_repo }} \ + --run-id=${{ env.ARTIFACT_RUN_ID }} \ + --artifact-group=${{ inputs.artifact_group }} \ + --output-dir="${{ env.ARTIFACTS_DIR }}" \ + _dev_ _lib_ _run_ + + - name: Build Python packages + run: | + python ./build_tools/build_python_packages.py \ + --artifact-dir="${{ env.ARTIFACTS_DIR }}" \ + --dest-dir="${{ env.PACKAGES_DIR }}" \ + --version="${{ inputs.package_version }}" + + - name: Inspect Python packages + run: | + ls -la "${{ env.PACKAGES_DIR }}" + + - name: Sanity check Python packages + run: | + piprepo build "${{ env.PACKAGES_DIR }}/dist" + pip install rocm[libraries,devel]==${{ inputs.package_version }} \ + --extra-index-url "${{ env.PACKAGES_DIR }}/dist/simple/" + rocm-sdk test + + # TODO(#1559): upload packages to artifacts S3 bucket and/or a dedicated Python packages bucket diff --git a/.github/workflows/build_windows_pytorch_wheels.yml b/.github/workflows/build_windows_pytorch_wheels.yml new file mode 100644 index 0000000000000..aa1fc5d43a75f --- /dev/null +++ b/.github/workflows/build_windows_pytorch_wheels.yml @@ -0,0 +1,357 @@ +name: Build Windows PyTorch Wheels + +on: + workflow_call: + inputs: + amdgpu_family: + required: true + type: string + python_version: + required: true + type: string + release_type: + description: The type of release to build ("dev", "nightly", or "prerelease"). All developer-triggered jobs should use "dev"! + required: true + type: string + s3_subdir: + description: S3 subdirectory, not including the GPU-family + required: true + type: string + s3_staging_subdir: + description: S3 staging subdirectory, not including the GPU-family + required: true + type: string + cloudfront_url: + description: CloudFront URL pointing to Python index + required: true + type: string + cloudfront_staging_url: + description: CloudFront base URL pointing to staging Python index + required: true + type: string + rocm_version: + description: ROCm version to pip install (e.g. "7.10.0a20251124") + type: string + pytorch_git_ref: + description: PyTorch ref to checkout. (typically "nightly", or "release/X.Y") + required: true + type: string + pytorch_patchset: + description: Patch directory name from where to apply existing patches. + required: true + type: string + repository: + description: "Repository to checkout. Otherwise, defaults to `github.repository`." + type: string + ref: + description: "Branch, tag or SHA to checkout. Defaults to the reference or SHA that triggered the workflow." + type: string + workflow_dispatch: + inputs: + amdgpu_family: + type: choice + options: + - gfx101X-dgpu + - gfx103X-dgpu + - gfx110X-all + - gfx1150 + - gfx1151 + - gfx120X-all + - gfx90X-dcgpu + - gfx94X-dcgpu + - gfx950-dcgpu + default: gfx1151 + python_version: + required: true + type: string + default: "3.12" + release_type: + description: The type of release to build ("dev", "nightly", or "prerelease"). All developer-triggered jobs should use "dev"! + type: string + default: "dev" + s3_subdir: + description: S3 subdirectory, not including the GPU-family + type: string + default: "v2" + s3_staging_subdir: + description: S3 staging subdirectory, not including the GPU-family + type: string + default: "v2-staging" + cloudfront_url: + description: CloudFront base URL pointing to Python index + type: string + default: "https://rocm.devreleases.amd.com/v2" + cloudfront_staging_url: + description: CloudFront base URL pointing to staging Python index + type: string + default: "https://rocm.devreleases.amd.com/v2-staging" + rocm_version: + description: ROCm version to pip install (e.g. "7.10.0a20251124") + type: string + pytorch_git_ref: + description: PyTorch ref to checkout. (typically "nightly", or "release/X.Y") + required: true + type: string + default: "release/2.7" + pytorch_patchset: + description: Patch directory name from where to apply existing patches. + required: true + type: string + default: "rocm_2.7" + +permissions: + id-token: write + contents: read + +jobs: + build_pytorch_wheels: + name: Build | ${{ inputs.amdgpu_family }} | py ${{ inputs.python_version }} | torch ${{ inputs.pytorch_git_ref }} + runs-on: ${{ github.repository_owner == 'ROCm' && 'azure-windows-scale-rocm' || 'windows-2022' }} + env: + CHECKOUT_ROOT: B:/src + OUTPUT_DIR: ${{ github.workspace }}/output + # Note the \ here instead of /. This should be used from 'cmd' not 'bash'! + PACKAGE_DIST_DIR: ${{ github.workspace }}\output\packages\dist + S3_BUCKET_PY: "therock-${{ inputs.release_type }}-python" + optional_build_prod_arguments: "" + outputs: + cp_version: ${{ env.cp_version }} + # The following are python package versions produced by the build. The + # exact versions will depend on workflow inputs and the underlying code. + # For example: + # Inputs + # rocm_version : 7.10.0a20251120 + # pytorch_git_ref : release/2.9 + # Outputs + # torch_version : 2.9.1+rocm7.10.0a20251120 + # torchaudio_version : 2.9.0+rocm7.10.0a20251120 + # torchvision_version: 0.24.0+rocm7.10.0a20251120 + # Future jobs can use these version outputs to identify newly built + # packages, for example via `pip install torch==${TORCH_VERSION}`. + torch_version: ${{ steps.build-pytorch-wheels.outputs.torch_version }} + torchaudio_version: ${{ steps.build-pytorch-wheels.outputs.torchaudio_version }} + torchvision_version: ${{ steps.build-pytorch-wheels.outputs.torchvision_version }} + + defaults: + run: + # Note: there are mixed uses of 'bash' (this default) and 'cmd' below + shell: bash + steps: + - name: Checkout + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + + - name: Configure Git Identity + run: | + git config --global user.name "therockbot" + git config --global user.email "therockbot@amd.com" + + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ inputs.python_version }} + + - name: Select Python version + run: | + python build_tools/github_actions/python_to_cp_version.py \ + --python-version ${{ inputs.python_version }} + + # TODO(amd-justchen): share with build_windows_artifacts.yml. Include in VM image? Dockerfile? + - name: Install requirements + run: | + choco install --no-progress -y ninja --version 1.13.1 + choco install --no-progress -y awscli + echo "$PATH;C:\Program Files\Amazon\AWSCLIV2" >> $GITHUB_PATH + + # After other installs, so MSVC get priority in the PATH. + - name: Configure MSVC + uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0 + + # Checkout nightly sources from https://github.com/pytorch/pytorch + # TODO: switch to 'nightly' to match our Linux workflows? + - name: Checkout PyTorch source repos (nightly branch) + if: ${{ inputs.pytorch_git_ref == 'nightly' }} + run: | + git config --global core.longpaths true + python ./external-builds/pytorch/pytorch_torch_repo.py checkout \ + --checkout-dir ${{ env.CHECKOUT_ROOT }}/torch \ + --repo-hashtag nightly + python ./external-builds/pytorch/pytorch_audio_repo.py checkout \ + --checkout-dir ${{ env.CHECKOUT_ROOT }}/audio \ + --repo-hashtag nightly + python ./external-builds/pytorch/pytorch_vision_repo.py checkout \ + --checkout-dir ${{ env.CHECKOUT_ROOT }}/vision \ + --repo-hashtag nightly + + # Checkout stable sources from https://github.com/ROCm/pytorch + - name: Checkout PyTorch Source Repos from stable branch + if: ${{ inputs.pytorch_git_ref != 'nightly' }} + run: | + git config --global core.longpaths true + python ./external-builds/pytorch/pytorch_torch_repo.py checkout \ + --checkout-dir ${{ env.CHECKOUT_ROOT }}/torch \ + --gitrepo-origin https://github.com/ROCm/pytorch.git \ + --repo-hashtag ${{ inputs.pytorch_git_ref }} \ + --patchset ${{ inputs.pytorch_patchset }} + python ./external-builds/pytorch/pytorch_audio_repo.py checkout \ + --checkout-dir ${{ env.CHECKOUT_ROOT }}/audio \ + --torch-dir ${{ env.CHECKOUT_ROOT }}/torch \ + --require-related-commit + python ./external-builds/pytorch/pytorch_vision_repo.py checkout \ + --checkout-dir ${{ env.CHECKOUT_ROOT }}/vision \ + --torch-dir ${{ env.CHECKOUT_ROOT }}/torch \ + --require-related-commit + + - name: Determine optional arguments passed to `build_prod_wheels.py` + if: ${{ inputs.rocm_version }} + run: | + pip install packaging + python build_tools/github_actions/determine_version.py \ + --rocm-version ${{ inputs.rocm_version }} + + - name: Build PyTorch Wheels + id: build-pytorch-wheels + # Using 'cmd' here is load bearing! There are configuration issues when + # run under 'bash': https://github.com/ROCm/TheRock/issues/827#issuecomment-3025858800 + shell: cmd + run: | + echo "Building PyTorch wheels for ${{ inputs.amdgpu_family }}" + python ./external-builds/pytorch/build_prod_wheels.py ^ + build ^ + --install-rocm ^ + --index-url "${{ inputs.cloudfront_url }}/${{ inputs.amdgpu_family }}/" ^ + --pytorch-dir ${{ env.CHECKOUT_ROOT }}/torch ^ + --pytorch-audio-dir ${{ env.CHECKOUT_ROOT }}/audio ^ + --pytorch-vision-dir ${{ env.CHECKOUT_ROOT }}/vision ^ + --enable-pytorch-flash-attention-windows ^ + --clean ^ + --output-dir ${{ env.PACKAGE_DIST_DIR }} ^ + ${{ env.optional_build_prod_arguments }} + python ./build_tools/github_actions/write_torch_versions.py --dist-dir ${{ env.PACKAGE_DIST_DIR }} + + - name: Sanity Check Wheel + shell: cmd + run: | + python external-builds/pytorch/sanity_check_wheel.py ${{ env.PACKAGE_DIST_DIR }} + + - name: Configure AWS Credentials + if: always() + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-${{ inputs.release_type }}-releases + special-characters-workaround: true + + - name: Upload wheels to S3 staging + if: ${{ github.repository_owner == 'ROCm' }} + # Using 'cmd' here since PACKAGE_DIST_DIR uses \ in paths instead of / + shell: cmd + run: | + aws s3 cp ${{ env.PACKAGE_DIST_DIR }}/ ^ + s3://${{ env.S3_BUCKET_PY }}/${{ inputs.s3_staging_subdir }}/${{ inputs.amdgpu_family }}/ ^ + --recursive --exclude "*" --include "*.whl" + + - name: (Re-)Generate Python package release index for staging + if: ${{ github.repository_owner == 'ROCm' }} + env: + # Environment variables to be set for `manage.py` + CUSTOM_PREFIX: "${{ inputs.s3_staging_subdir }}/${{ inputs.amdgpu_family }}" + shell: cmd + run: | + pip install boto3 packaging + python ./build_tools/third_party/s3_management/manage.py ${{ env.CUSTOM_PREFIX }} + + generate_target_to_run: + name: Generate target_to_run + runs-on: ubuntu-24.04 + outputs: + test_runs_on: ${{ steps.configure.outputs.test-runs-on }} + bypass_tests_for_releases: ${{ steps.configure.outputs.bypass_tests_for_releases }} + steps: + - name: Checking out repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + + - name: Generating target to run + id: configure + env: + TARGET: ${{ inputs.amdgpu_family }} + PLATFORM: "windows" + # Variable comes from ROCm organization variable 'ROCM_THEROCK_TEST_RUNNERS' + ROCM_THEROCK_TEST_RUNNERS: ${{ vars.ROCM_THEROCK_TEST_RUNNERS }} + LOAD_TEST_RUNNERS_FROM_VAR: false + run: python ./build_tools/github_actions/configure_target_run.py + + test_pytorch_wheels: + name: Test | ${{ inputs.amdgpu_family }} | ${{ needs.generate_target_to_run.outputs.test_runs_on }} + if: ${{ needs.generate_target_to_run.outputs.test_runs_on != '' }} + needs: [build_pytorch_wheels, generate_target_to_run] + uses: ./.github/workflows/test_pytorch_wheels.yml + with: + amdgpu_family: ${{ inputs.amdgpu_family }} + test_runs_on: ${{ needs.generate_target_to_run.outputs.test_runs_on }} + package_index_url: ${{ inputs.cloudfront_staging_url }} + python_version: ${{ inputs.python_version }} + torch_version: ${{ needs.build_pytorch_wheels.outputs.torch_version }} + pytorch_git_ref: ${{ inputs.pytorch_git_ref }} + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + + upload_pytorch_wheels: + name: Release PyTorch Wheels to S3 + needs: [build_pytorch_wheels, generate_target_to_run, test_pytorch_wheels] + if: ${{ !cancelled() }} + runs-on: ubuntu-24.04 + env: + S3_BUCKET_PY: "therock-${{ inputs.release_type }}-python" + CP_VERSION: "${{ needs.build_pytorch_wheels.outputs.cp_version }}" + TORCH_VERSION: "${{ needs.build_pytorch_wheels.outputs.torch_version }}" + TORCHAUDIO_VERSION: "${{ needs.build_pytorch_wheels.outputs.torchaudio_version }}" + TORCHVISION_VERSION: "${{ needs.build_pytorch_wheels.outputs.torchvision_version }}" + steps: + - name: Checkout + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + + - name: Configure AWS Credentials + if: always() + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-${{ inputs.release_type }}-releases + special-characters-workaround: true + + - name: Determine upload flag + env: + BUILD_RESULT: ${{ needs.build_pytorch_wheels.result }} + TEST_RESULT: ${{ needs.test_pytorch_wheels.result }} + TEST_RUNS_ON: ${{ needs.generate_target_to_run.outputs.test_runs_on }} + BYPASS_TESTS_FOR_RELEASES: ${{ needs.generate_target_to_run.outputs.bypass_tests_for_releases }} + run: python ./build_tools/github_actions/promote_wheels_based_on_policy.py + + - name: Copy PyTorch wheels from staging to release S3 + if: ${{ env.upload == 'true' }} + run: | + echo "Copying exact tested wheels to release S3 bucket..." + aws s3 cp \ + s3://${S3_BUCKET_PY}/${{ inputs.s3_staging_subdir }}/${{ inputs.amdgpu_family }}/ \ + s3://${S3_BUCKET_PY}/${{ inputs.s3_subdir }}/${{ inputs.amdgpu_family }}/ \ + --recursive \ + --exclude "*" \ + --include "torch-${TORCH_VERSION}-${CP_VERSION}-win_amd64.whl" \ + --include "torchaudio-${TORCHAUDIO_VERSION}-${CP_VERSION}-win_amd64.whl" \ + --include "torchvision-${TORCHVISION_VERSION}-${CP_VERSION}-win_amd64.whl" + + - name: (Re-)Generate Python package release index + if: ${{ env.upload == 'true' }} + env: + # Environment variables to be set for `manage.py` + CUSTOM_PREFIX: "${{ inputs.s3_subdir }}/${{ inputs.amdgpu_family }}" + run: | + pip install boto3 packaging + python ./build_tools/third_party/s3_management/manage.py ${{ env.CUSTOM_PREFIX }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000000..d131226a8d3b8 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,138 @@ +# This CI workflow is triggered by: +# - push to main branch +# - pull request +# - workflow dispatch +# +# For pull requests, we run default builds and tests for: +# - Linux: gfx94X gfx110X +# - Windows: gfx110X +# If you want to trigger jobs for additional targets, please add a defined label (ex: gfx120X-linux) to the pull request +# +# For push to main branch, all AMD families will built and tested from `amdgpu_family_matrix.py`. +# +# Note: If a test machine is not available for a specific AMD GPU family in `amdgpu_family_matrix.py`, tests will be skipped. + +name: CI + +on: + push: + branches: + - main + workflow_dispatch: + inputs: + linux_amdgpu_families: + type: string + description: "Insert comma-separated list of Linux GPU families to build and test. ex: gfx94X, gfx1201X" + default: "" + linux_test_labels: + type: string + description: "If enabled, reduce test set on Linux to the list of labels prefixed with 'test:'. ex: test:rocprim, test:hipcub" + default: "" + linux_use_prebuilt_artifacts: + type: boolean + description: "If enabled, the CI will pull Linux artifacts using artifact_run_id and only run tests" + windows_amdgpu_families: + type: string + description: "Insert comma-separated list of Windows GPU families to build and test. ex: gfx94X, gfx1201X" + default: "" + windows_test_labels: + type: string + description: "If enabled, reduce test set on Windows to the list of labels prefixed with 'test:' ex: test:rocprim, test:hipcub" + default: "" + windows_use_prebuilt_artifacts: + type: boolean + description: "If enabled, the CI will pull Windows artifacts using artifact_run_id and only run tests" + artifact_run_id: + type: string + description: "If provided, the tests will run on this artifact ID" + default: "" + pull_request: + types: + - labeled + - opened + - synchronize + +permissions: + contents: read + +concurrency: + # A PR number if a pull request and otherwise the commit hash. This cancels + # queued and in-progress runs for the same PR (presubmit) or commit + # (postsubmit). The workflow name is prepended to avoid conflicts between + # different workflows. + group: ${{ github.workflow }}-${{ github.event.number || github.sha }} + cancel-in-progress: true + +jobs: + setup: + uses: ./.github/workflows/setup.yml + with: + build_variant: "release" + + linux_build_and_test: + name: Linux::${{ matrix.variant.family }}::${{ matrix.variant.build_variant_label }} + needs: setup + if: >- + ${{ + needs.setup.outputs.linux_variants != '[]' && + needs.setup.outputs.enable_build_jobs == 'true' + }} + strategy: + fail-fast: false + matrix: + variant: ${{ fromJSON(needs.setup.outputs.linux_variants) }} + uses: ./.github/workflows/ci_linux.yml + secrets: inherit + with: + amdgpu_families: ${{ matrix.variant.family }} + artifact_group: ${{ matrix.variant.artifact_group }} + test_runs_on: ${{ matrix.variant.test-runs-on }} + build_variant_label: ${{ matrix.variant.build_variant_label }} + build_variant_suffix: ${{ matrix.variant.build_variant_suffix }} + build_variant_cmake_preset: ${{ matrix.variant.build_variant_cmake_preset }} + test_labels: ${{ needs.setup.outputs.linux_test_labels }} + artifact_run_id: ${{ inputs.artifact_run_id }} + expect_failure: ${{ matrix.variant.expect_failure == true }} + use_prebuilt_artifacts: ${{ inputs.linux_use_prebuilt_artifacts == true && 'true' || 'false' }} + rocm_package_version: ${{ needs.setup.outputs.rocm_package_version }} + test_type: ${{ needs.setup.outputs.test_type }} + sanity_check_only_for_family: ${{ matrix.variant.sanity_check_only_for_family == true }} + permissions: + contents: read + id-token: write + + # build_python_packages: + # name: Build Python Packages + # uses: ./.github/workflows/build_python_packages.yml + + ci_summary: + name: CI Summary + if: always() + needs: + - setup + - linux_build_and_test + runs-on: ubuntu-24.04 + steps: + - name: Output failed jobs + run: | + echo '${{ toJson(needs) }}' + + # Build a list of failed jobs, but ignore those marked continue-on-error + FAILED_JOBS="$(echo '${{ toJson(needs) }}' \ + | jq --raw-output ' + to_entries + | map(select( + (.value.result != "success" and .value.result != "skipped") + and (.value.outputs.continue_on_error | not) + )) + | map(.key) + | join(",") + ' \ + )" + + if [[ -n "${FAILED_JOBS}" ]]; then + echo "The following jobs failed: ${FAILED_JOBS}" + exit 1 + else + echo "All required jobs succeeded (continue-on-error jobs ignored)." + fi diff --git a/.github/workflows/ci_asan.yml b/.github/workflows/ci_asan.yml new file mode 100644 index 0000000000000..4da6ce0b14d11 --- /dev/null +++ b/.github/workflows/ci_asan.yml @@ -0,0 +1,67 @@ +name: CI ASAN + +on: + schedule: + - cron: "0 2 * * *" # Runs nightly at 2 AM UTC + workflow_dispatch: + inputs: + linux_amdgpu_families: + type: string + description: "Insert comma-separated list of Linux GPU families to build and test. ex: gfx94X, gfx1201X" + default: "" + linux_use_prebuilt_artifacts: + type: boolean + description: "If enabled, the CI will pull Linux artifacts using artifact_run_id and only run tests" + artifact_run_id: + type: string + description: "If provided, the tests will run on this artifact ID" + default: "" + +permissions: + contents: read + +concurrency: + # A PR number if a pull request and otherwise the commit hash. This cancels + # queued and in-progress runs for the same PR (presubmit) or commit + # (postsubmit). The workflow name is prepended to avoid conflicts between + # different workflows. + group: ${{ github.workflow }}-${{ github.event.number || github.sha }} + cancel-in-progress: true + +jobs: + setup: + uses: ./.github/workflows/setup.yml + with: + build_variant: "asan" + + linux_build_and_test: + name: Linux::${{ matrix.variant.family }}::${{ matrix.variant.build_variant_label }} + needs: setup + if: >- + ${{ + needs.setup.outputs.linux_variants != '[]' && + needs.setup.outputs.enable_build_jobs == 'true' + }} + strategy: + fail-fast: false + matrix: + variant: ${{ fromJSON(needs.setup.outputs.linux_variants) }} + uses: ./.github/workflows/ci_linux.yml + secrets: inherit + with: + amdgpu_families: ${{ matrix.variant.family }} + artifact_group: ${{ matrix.variant.artifact_group }} + test_runs_on: ${{ matrix.variant.test-runs-on }} + build_variant_label: ${{ matrix.variant.build_variant_label }} + build_variant_suffix: ${{ matrix.variant.build_variant_suffix }} + build_variant_cmake_preset: ${{ matrix.variant.build_variant_cmake_preset }} + test_labels: ${{ needs.setup.outputs.linux_test_labels }} + artifact_run_id: ${{ inputs.artifact_run_id }} + expect_failure: ${{ matrix.variant.expect_failure == true }} + use_prebuilt_artifacts: ${{ inputs.linux_use_prebuilt_artifacts == true && 'true' || 'false' }} + rocm_package_version: ${{ needs.setup.outputs.rocm_package_version }} + test_type: ${{ needs.setup.outputs.test_type }} + sanity_check_only_for_family: ${{ matrix.variant.sanity_check_only_for_family == true }} + permissions: + contents: read + id-token: write diff --git a/.github/workflows/ci_linux.yml b/.github/workflows/ci_linux.yml new file mode 100644 index 0000000000000..e9522b323870d --- /dev/null +++ b/.github/workflows/ci_linux.yml @@ -0,0 +1,108 @@ +name: CI - Linux + +on: + workflow_call: + inputs: + artifact_group: + type: string + amdgpu_families: + type: string + build_variant_label: + type: string + build_variant_cmake_preset: + type: string + build_variant_suffix: + type: string + test_labels: + type: string + artifact_run_id: + type: string + test_runs_on: + type: string + expect_failure: + type: boolean + use_prebuilt_artifacts: + type: string + rocm_package_version: + type: string + test_type: + type: string + sanity_check_only_for_family: + type: boolean + +permissions: + contents: read + +jobs: + build_portable_linux_artifacts: + name: Build Artifacts + if: ${{ inputs.use_prebuilt_artifacts == 'false' }} + uses: ./.github/workflows/build_portable_linux_artifacts.yml + secrets: inherit + with: + artifact_group: ${{ inputs.artifact_group }} + package_version: ${{ inputs.rocm_package_version }} + amdgpu_families: ${{ inputs.amdgpu_families }} + build_variant_label: ${{ inputs.build_variant_label }} + build_variant_cmake_preset: ${{ inputs.build_variant_cmake_preset }} + build_variant_suffix: ${{ inputs.build_variant_suffix }} + expect_failure: ${{ inputs.expect_failure }} + permissions: + contents: read + id-token: write + + # TODO: rework "artifact_run_id" and "use_prebuilt_artifacts" here? + # I don't want to copy/paste this condition and special case plumbing + # through multiple workflows. All the packaging and testing workflows need + # to know is what artifact run id to use. That could be the current + # (implicit) run id, or it could be an explicit run id. + # How about having the "build artifacts" job run as a passthrough? + + test_linux_artifacts: + needs: [build_portable_linux_artifacts] + name: Test Artifacts + # If the dependent job failed/cancelled, this job will not be run + # The use_prebuilt_artifacts "or" statement ensures that tests will run if + # previous build step is run or skipped.concurrency. + # If we are expecting a build failure, do not run tests to save machine capacity + if: >- + ${{ + !failure() && + !cancelled() && + ( + inputs.use_prebuilt_artifacts == 'false' || + inputs.use_prebuilt_artifacts == 'true' + ) && + inputs.expect_failure == false + }} + uses: ./.github/workflows/test_artifacts.yml + with: + artifact_group: ${{ inputs.artifact_group }} + amdgpu_families: ${{ inputs.amdgpu_families }} + test_runs_on: ${{ inputs.test_runs_on }} + artifact_run_id: ${{ inputs.artifact_run_id }} + test_type: ${{ inputs.test_type }} + test_labels: ${{ inputs.test_labels }} + sanity_check_only_for_family: ${{ inputs.sanity_check_only_for_family == true }} + + build_portable_linux_python_packages: + needs: [build_portable_linux_artifacts] + name: Build Python + # If the dependent job failed/cancelled, this job will not be run + # The use_prebuilt_artifacts "or" statement ensures that tests will run if + # previous build step is run or skipped.concurrency. + if: >- + ${{ + !failure() && + !cancelled() && + ( + inputs.use_prebuilt_artifacts == 'false' || + inputs.use_prebuilt_artifacts == 'true' + ) && + inputs.expect_failure == false + }} + uses: ./.github/workflows/build_portable_linux_python_packages.yml + with: + artifact_run_id: "${{ inputs.artifact_run_id != '' && inputs.artifact_run_id || github.run_id }}" + artifact_group: ${{ inputs.artifact_group }} + package_version: ${{ inputs.rocm_package_version }} diff --git a/.github/workflows/ci_nightly.yml b/.github/workflows/ci_nightly.yml new file mode 100644 index 0000000000000..e15f5e887a077 --- /dev/null +++ b/.github/workflows/ci_nightly.yml @@ -0,0 +1,124 @@ +# This CI workflow is triggered by: +# - scheduled run +# +# In the scheduled run, we run all targets from amdgpu_family_matrix.py and amdgpu_family_matrix_xfail.py +# As some of these builds are xfail, we allow errors to occur with `continue-on-error`, where the job will fail but the workflow is green + +name: CI Nightly + +on: + # For AMD GPU families that expect_failure, we run builds and tests from this scheduled trigger + schedule: + - cron: "0 2 * * *" # Runs nightly at 2 AM UTC + workflow_dispatch: + inputs: + linux_amdgpu_families: + type: string + description: "Insert comma-separated list of Linux GPU families to build and test. ex: gfx94X, gfx1201X" + default: "" + linux_test_labels: + type: string + description: "If enabled, reduce test set on Linux to the list of labels prefixed with 'test:'" + default: "" + linux_use_prebuilt_artifacts: + type: boolean + description: "If enabled, the CI will pull Linux artifacts using artifact_run_id and only run tests" + windows_amdgpu_families: + type: string + description: "Insert comma-separated list of Windows GPU families to build and test. ex: gfx94X, gfx1201X" + default: "" + windows_test_labels: + type: string + description: "If enabled, reduce test set on Windows to the list of labels prefixed with 'test:'" + default: "" + windows_use_prebuilt_artifacts: + type: boolean + description: "If enabled, the CI will pull Windows artifacts using artifact_run_id and only run tests" + artifact_run_id: + type: string + description: "If provided, the tests will run on this artifact ID" + default: "" + +permissions: + contents: read + +concurrency: + # A PR number if a pull request and otherwise the commit hash. This cancels + # queued and in-progress runs for the same PR (presubmit) or commit + # (postsubmit). The workflow name is prepended to avoid conflicts between + # different workflows. + group: ${{ github.workflow }}-${{ github.event.number || github.sha }} + cancel-in-progress: true + +jobs: + setup: + uses: ./.github/workflows/setup.yml + with: + build_variant: "release" + + linux_build_and_test: + name: Linux::${{ matrix.variant.family }}::${{ matrix.variant.build_variant_label }} + needs: setup + if: >- + ${{ + needs.setup.outputs.linux_variants != '[]' && + needs.setup.outputs.enable_build_jobs == 'true' + }} + strategy: + fail-fast: false + matrix: + variant: ${{ fromJSON(needs.setup.outputs.linux_variants) }} + uses: ./.github/workflows/ci_linux.yml + secrets: inherit + with: + amdgpu_families: ${{ matrix.variant.family }} + artifact_group: ${{ matrix.variant.artifact_group }} + test_runs_on: ${{ matrix.variant.test-runs-on }} + build_variant_label: ${{ matrix.variant.build_variant_label }} + build_variant_suffix: ${{ matrix.variant.build_variant_suffix }} + build_variant_cmake_preset: ${{ matrix.variant.build_variant_cmake_preset }} + test_labels: ${{ needs.setup.outputs.linux_test_labels }} + artifact_run_id: ${{ inputs.artifact_run_id }} + expect_failure: ${{ matrix.variant.expect_failure == true }} + use_prebuilt_artifacts: ${{ inputs.linux_use_prebuilt_artifacts == true && 'true' || 'false' }} + rocm_package_version: ${{ needs.setup.outputs.rocm_package_version }} + test_type: ${{ needs.setup.outputs.test_type }} + sanity_check_only_for_family: ${{ matrix.variant.sanity_check_only_for_family == true }} + permissions: + contents: read + id-token: write + + windows_build_and_test: + name: Windows::${{ matrix.variant.family }}::${{ matrix.variant.build_variant_label }} + needs: setup + if: >- + ${{ + needs.setup.outputs.windows_variants != '[]' && + needs.setup.outputs.enable_build_jobs == 'true' + }} + strategy: + fail-fast: false + matrix: + variant: ${{ fromJSON(needs.setup.outputs.windows_variants) }} + uses: ./.github/workflows/ci_windows.yml + with: + amdgpu_families: ${{ matrix.variant.family }} + artifact_group: ${{ matrix.variant.artifact_group }} + test_runs_on: ${{ matrix.variant.test-runs-on }} + build_variant_label: ${{ matrix.variant.build_variant_label }} + build_variant_suffix: ${{ matrix.variant.build_variant_suffix }} + build_variant_cmake_preset: ${{ matrix.variant.build_variant_cmake_preset }} + test_labels: ${{ needs.setup.outputs.windows_test_labels }} + artifact_run_id: ${{ inputs.artifact_run_id }} + expect_failure: ${{ matrix.variant.expect_failure == true }} + use_prebuilt_artifacts: ${{ inputs.windows_use_prebuilt_artifacts == true && 'true' || 'false' }} + rocm_package_version: ${{ needs.setup.outputs.rocm_package_version }} + test_type: ${{ needs.setup.outputs.test_type }} + sanity_check_only_for_family: ${{ matrix.variant.sanity_check_only_for_family == true }} + permissions: + contents: read + id-token: write + + # build_python_packages: + # name: Build Python Packages + # uses: ./.github/workflows/build_python_packages.yml diff --git a/.github/workflows/ci_weekly.yml b/.github/workflows/ci_weekly.yml new file mode 100644 index 0000000000000..9570a74f3f7e1 --- /dev/null +++ b/.github/workflows/ci_weekly.yml @@ -0,0 +1,14 @@ +name: WIP Placeholder CI Weekly + +on: + # For AMD GPU families that expect_failure, we run builds and tests from this scheduled trigger + # schedule: + # - cron: "0 3 * * 0" # Runs weekly at 3 AM UTC Sundays + workflow_dispatch: + + +jobs: + donothing: + runs-on: ubuntu-latest + steps: + - run: echo "Skipped" diff --git a/.github/workflows/ci_windows.yml b/.github/workflows/ci_windows.yml new file mode 100644 index 0000000000000..536463a2c4e43 --- /dev/null +++ b/.github/workflows/ci_windows.yml @@ -0,0 +1,108 @@ +name: CI - Windows + +on: + workflow_call: + inputs: + artifact_group: + type: string + amdgpu_families: + type: string + build_variant_label: + type: string + build_variant_cmake_preset: + type: string + build_variant_suffix: + type: string + test_labels: + type: string + artifact_run_id: + type: string + test_runs_on: + type: string + expect_failure: + type: boolean + use_prebuilt_artifacts: + type: string + rocm_package_version: + type: string + test_type: + type: string + sanity_check_only_for_family: + type: boolean + +permissions: + contents: read + +jobs: + build_windows_artifacts: + name: Build Artifacts + if: ${{ inputs.use_prebuilt_artifacts == 'false' }} + uses: ./.github/workflows/build_windows_artifacts.yml + secrets: inherit + with: + artifact_group: ${{ inputs.artifact_group }} + amdgpu_families: ${{ inputs.amdgpu_families }} + build_variant_label: ${{ inputs.build_variant_label }} + build_variant_cmake_preset: ${{ inputs.build_variant_cmake_preset }} + build_variant_suffix: ${{ inputs.build_variant_suffix }} + package_version: ${{ inputs.rocm_package_version }} + expect_failure: ${{ inputs.expect_failure }} + permissions: + contents: read + id-token: write + + # TODO: rework "artifact_run_id" and "use_prebuilt_artifacts" here? + # I don't want to copy/paste this condition and special case plumbing + # through multiple workflows. All the packaging and testing workflows need + # to know is what artifact run id to use. That could be the current + # (implicit) run id, or it could be an explicit run id. + # How about having the "build artifacts" job run as a passthrough? + + test_windows_artifacts: + needs: [build_windows_artifacts] + name: Test Artifacts + # If the dependent job failed/cancelled, this job will not be run + # The use_prebuilt_artifacts "or" statement ensures that tests will run if + # previous build step is run or skipped.concurrency. + # If we are expecting a build failure, do not run tests to save machine capacity + if: >- + ${{ + !failure() && + !cancelled() && + ( + inputs.use_prebuilt_artifacts == 'false' || + inputs.use_prebuilt_artifacts == 'true' + ) && + inputs.expect_failure == false + }} + uses: ./.github/workflows/test_artifacts.yml + with: + artifact_group: ${{ inputs.artifact_group }} + amdgpu_families: ${{ inputs.amdgpu_families }} + test_runs_on: ${{ inputs.test_runs_on }} + artifact_run_id: ${{ inputs.artifact_run_id }} + test_type: ${{ inputs.test_type }} + test_labels: ${{ inputs.test_labels }} + sanity_check_only_for_family: ${{ inputs.sanity_check_only_for_family == true }} + + build_windows_python_packages: + needs: [build_windows_artifacts] + name: Build Python + # If the dependent job failed/cancelled, this job will not be run + # The use_prebuilt_artifacts "or" statement ensures that tests will run if + # previous build step is run or skipped.concurrency. + if: >- + ${{ + !failure() && + !cancelled() && + ( + inputs.use_prebuilt_artifacts == 'false' || + inputs.use_prebuilt_artifacts == 'true' + ) && + inputs.expect_failure == false + }} + uses: ./.github/workflows/build_windows_python_packages.yml + with: + artifact_run_id: "${{ inputs.artifact_run_id != '' && inputs.artifact_run_id || github.run_id }}" + artifact_group: ${{ inputs.artifact_group }} + package_version: ${{ inputs.rocm_package_version }} diff --git a/.github/workflows/copy_release.yml b/.github/workflows/copy_release.yml new file mode 100644 index 0000000000000..fd4a49dbe4993 --- /dev/null +++ b/.github/workflows/copy_release.yml @@ -0,0 +1,101 @@ +name: Copy release to dev bucket + +on: + workflow_dispatch: + inputs: + rocm_version: + description: ROCm version to copy, e.g. 7.0.0rc20250912 + type: string + amdgpu_family: + type: choice + options: + - gfx101X-dgpu + - gfx103X-dgpu + - gfx110X-all + - gfx1150 + - gfx1151 + - gfx120X-all + - gfx90X-dcgpu + - gfx94X-dcgpu + - gfx950-dcgpu + default: gfx94X-dcgpu + python_version: + type: choice + options: + - 3.11 + - 3.12 + - 3.13 + default: 3.12 + include_torch: + type: boolean + default: false + sourcesubdir: + type: choice + options: + - v2 + - v2-staging + destsubdir: + type: string + default: v2 + sourcebucket: + type: choice + options: + - nightly + - dev + default: nightly + destbucket: + type: choice + options: + - dev + - nightly + default: dev +permissions: + contents: read + +jobs: + copy_python_packages: + name: Copy ${{ inputs.sourcebucket }} ${{ inputs.sourcesubdir }} -> ${{ inputs.destbucket }} ${{ inputs.destsubdir }} | ${{ inputs.amdgpu_family }} | rocm ${{ inputs.rocm_version }} | py ${{ inputs.python_version }} + runs-on: ubuntu-24.04 + permissions: + id-token: write + + steps: + - name: Checkout Repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - name: Install the AWS tool + run: ./dockerfiles/install_awscli.sh + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-${{ inputs.destbucket }}-releases + + - name: Select Python version + run: | + python build_tools/github_actions/python_to_cp_version.py \ + --python-version ${{ inputs.python_version }} + + - name: Copy ROCm packages between S3 buckets + run: | + aws s3 cp \ + s3://therock-${{ inputs.sourcebucket }}-python/${{ inputs.sourcesubdir }}/${{ inputs.amdgpu_family }}/ \ + s3://therock-${{ inputs.destbucket }}-python/${{ inputs.destsubdir }}/${{ inputs.amdgpu_family }}/ \ + --recursive --exclude "*" --include "rocm*${{ inputs.rocm_version }}*" + + - name: Copy torch wheels between S3 buckets + if: ${{ inputs.include_torch }} + run: | + aws s3 cp \ + s3://therock-${{ inputs.sourcebucket }}-python/${{ inputs.sourcesubdir }}/${{ inputs.amdgpu_family }}/ \ + s3://therock-${{ inputs.destbucket }}-python/${{ inputs.destsubdir }}/${{ inputs.amdgpu_family }}/ \ + --recursive --exclude "*" --include "*torch*${{ inputs.rocm_version }}*${{ env.cp_version }}*" + + - name: (Re-)Generate Python package release index + env: + S3_BUCKET_PY: "therock-${{ inputs.destbucket }}-python" + CUSTOM_PREFIX: "${{ inputs.destsubdir }}/${{ inputs.amdgpu_family }}" + run: | + pip install boto3 packaging + python ./build_tools/third_party/s3_management/manage.py ${CUSTOM_PREFIX} diff --git a/.github/workflows/multi_arch_build_portable_linux.yml b/.github/workflows/multi_arch_build_portable_linux.yml new file mode 100644 index 0000000000000..acffe43062f43 --- /dev/null +++ b/.github/workflows/multi_arch_build_portable_linux.yml @@ -0,0 +1,785 @@ +# Multi-Arch Build - Sharded Pipeline for Linux +# +# This workflow builds TheRock in stages: +# 1. foundation (generic) - sysdeps, base +# 2. compiler-runtime (generic) - compiler, runtimes, profiler-core +# 3. math-libs (per-arch) - BLAS, FFT, etc. +# 4. comm-libs (per-arch) - RCCL (parallel to math-libs) +# 5. dctools-core (generic) - RDC (parallel to math-libs) +# 6. profiler-apps (generic) - rocprofiler-systems (parallel to math-libs) +# 7. media (generic) - sysdeps-amd-mesa, rocdecode (todo), rocjpeg (todo) +# +# Artifacts flow between stages via S3 using the artifact_manager.py tool. + +name: Multi-Arch Build (Linux) + +on: + workflow_call: + inputs: + artifact_group: + type: string + matrix_per_family_json: + type: string + description: "JSON array of {amdgpu_family, test-runs-on} objects for per-arch stages" + dist_amdgpu_families: + type: string + description: "Semicolon-separated list of all GPU families for dist targets" + build_variant_label: + type: string + build_variant_cmake_preset: + type: string + build_variant_suffix: + type: string + test_labels: + type: string + artifact_run_id: + type: string + expect_failure: + type: boolean + use_prebuilt_artifacts: + type: string + rocm_package_version: + type: string + test_type: + type: string + +permissions: + contents: read + +env: + CONTAINER_IMAGE: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:583d473f263a289222c48d4b493e2956b2354a45796f09dee6f2c8ecd4504ab6 + CCACHE_CONFIGPATH: ${{ github.workspace }}/.ccache/ccache.conf + CACHE_DIR: ${{ github.workspace }}/.container-cache + TEATIME_FORCE_INTERACTIVE: 0 + +jobs: + # ========================================================================== + # STAGE: foundation (generic) + # ========================================================================== + foundation: + name: Stage - Foundation + # Always run all stages + runs-on: azure-linux-scale-rocm + timeout-minutes: 180 # 3 hours + permissions: + id-token: write + container: + image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:583d473f263a289222c48d4b493e2956b2354a45796f09dee6f2c8ecd4504ab6 + options: -v /runner/config:/home/awsconfig/ + env: + AWS_SHARED_CREDENTIALS_FILE: /home/awsconfig/credentials.ini + STAGE_NAME: foundation + steps: + - name: Checkout Repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - name: Install python deps + run: pip install -r requirements.txt + + - name: Adjust git config + run: | + git config --global --add safe.directory $PWD + git config fetch.parallel 10 + + - name: Setup ccache + run: | + ./build_tools/setup_ccache.py \ + --config-preset "github-oss-presubmit" \ + --dir "$(dirname $CCACHE_CONFIGPATH)" \ + --local-path "$CACHE_DIR/ccache" + + - name: Runner health status + run: | + ./build_tools/health_status.py + + - name: Fetch sources + timeout-minutes: 30 + run: ./build_tools/fetch_sources.py --stage ${STAGE_NAME} --jobs 12 --depth 1 + + - name: Get stage configuration + id: stage_config + run: | + python build_tools/configure_stage.py \ + --stage ${STAGE_NAME} \ + --dist-amdgpu-families "${{ inputs.dist_amdgpu_families }}" \ + --gha-output + + - name: Install stage python deps + if: ${{ steps.stage_config.outputs.pip_install_cmd }} + run: pip install ${{ steps.stage_config.outputs.pip_install_cmd }} + + - name: Configure + run: | + cmake -B build -S . -GNinja \ + -DTHEROCK_PACKAGE_VERSION=${{ inputs.rocm_package_version }} \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + ${{ steps.stage_config.outputs.cmake_args }} + + - name: Build stage + run: | + cmake --build build --target stage-${STAGE_NAME} therock-artifacts -- -k 0 + + - name: Report + if: ${{ !cancelled() }} + run: | + echo "CCache Stats:" + ccache -s -v + echo "Artifacts:" + ls -lh build/artifacts/*.tar.xz 2>/dev/null || echo "No artifacts found" + + - name: Configure AWS Credentials + if: ${{ always() && !github.event.pull_request.head.repo.fork }} + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-ci + + - name: Push stage artifacts + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + python build_tools/artifact_manager.py push --run-id ${{ github.run_id }} \ + --stage ${STAGE_NAME} \ + --build-dir build + + # ========================================================================== + # STAGE: compiler-runtime (generic) + # ========================================================================== + compiler-runtime: + name: Stage - Compiler Runtime + needs: foundation + runs-on: azure-linux-scale-rocm + timeout-minutes: 480 # 8 hours (compiler is big) + permissions: + id-token: write + container: + image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:583d473f263a289222c48d4b493e2956b2354a45796f09dee6f2c8ecd4504ab6 + options: -v /runner/config:/home/awsconfig/ + env: + AWS_SHARED_CREDENTIALS_FILE: /home/awsconfig/credentials.ini + STAGE_NAME: compiler-runtime + steps: + - name: Checkout Repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - name: Install python deps + run: pip install -r requirements.txt + + - name: Adjust git config + run: | + git config --global --add safe.directory $PWD + git config fetch.parallel 10 + + - name: Setup ccache + run: | + ./build_tools/setup_ccache.py \ + --config-preset "github-oss-presubmit" \ + --dir "$(dirname $CCACHE_CONFIGPATH)" \ + --local-path "$CACHE_DIR/ccache" + + - name: Runner health status + run: | + ./build_tools/health_status.py + + - name: Configure AWS Credentials + if: ${{ !github.event.pull_request.head.repo.fork }} + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-ci + + - name: Fetch inbound artifacts + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + python build_tools/artifact_manager.py fetch --run-id ${{ github.run_id }} \ + --stage ${STAGE_NAME} \ + --output-dir build \ + --bootstrap + + - name: Fetch sources + timeout-minutes: 30 + run: ./build_tools/fetch_sources.py --stage ${STAGE_NAME} --jobs 12 --depth 1 + + - name: Get stage configuration + id: stage_config + run: | + python build_tools/configure_stage.py \ + --stage ${STAGE_NAME} \ + --dist-amdgpu-families "${{ inputs.dist_amdgpu_families }}" \ + --gha-output + + - name: Install stage python deps + if: ${{ steps.stage_config.outputs.pip_install_cmd }} + run: pip install ${{ steps.stage_config.outputs.pip_install_cmd }} + + - name: Configure + run: | + cmake -B build -S . -GNinja \ + -DTHEROCK_PACKAGE_VERSION=${{ inputs.rocm_package_version }} \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + ${{ steps.stage_config.outputs.cmake_args }} + + - name: Build stage + run: | + cmake --build build --target stage-${STAGE_NAME} therock-artifacts -- -k 0 + + - name: Report + if: ${{ !cancelled() }} + run: | + echo "CCache Stats:" + ccache -s -v + echo "Artifacts:" + ls -lh build/artifacts/*.tar.xz 2>/dev/null || echo "No artifacts found" + + - name: Configure AWS Credentials (refresh for push) + if: ${{ !github.event.pull_request.head.repo.fork }} + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-ci + + - name: Push stage artifacts + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + python build_tools/artifact_manager.py push --run-id ${{ github.run_id }} \ + --stage ${STAGE_NAME} \ + --build-dir build + + # ========================================================================== + # STAGE: math-libs (per-arch) + # ========================================================================== + math-libs: + name: Stage - Math Libs (${{ matrix.family_info.amdgpu_family }}) + needs: compiler-runtime + strategy: + fail-fast: false + matrix: + family_info: ${{ fromJSON(inputs.matrix_per_family_json) }} + runs-on: azure-linux-scale-rocm + timeout-minutes: 480 # 8 hours + permissions: + id-token: write + container: + image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:583d473f263a289222c48d4b493e2956b2354a45796f09dee6f2c8ecd4504ab6 + options: -v /runner/config:/home/awsconfig/ + env: + AWS_SHARED_CREDENTIALS_FILE: /home/awsconfig/credentials.ini + STAGE_NAME: math-libs + AMDGPU_FAMILIES: ${{ matrix.family_info.amdgpu_family }} + steps: + - name: Checkout Repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - name: Install python deps + run: pip install -r requirements.txt + + - name: Adjust git config + run: | + git config --global --add safe.directory $PWD + git config fetch.parallel 10 + + - name: Setup ccache + run: | + ./build_tools/setup_ccache.py \ + --config-preset "github-oss-presubmit" \ + --dir "$(dirname $CCACHE_CONFIGPATH)" \ + --local-path "$CACHE_DIR/ccache" + + - name: Runner health status + run: | + ./build_tools/health_status.py + + - name: Configure AWS Credentials + if: ${{ !github.event.pull_request.head.repo.fork }} + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-ci + + - name: Fetch inbound artifacts + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + python build_tools/artifact_manager.py fetch --run-id ${{ github.run_id }} \ + --stage ${STAGE_NAME} \ + --amdgpu-families ${{ matrix.family_info.amdgpu_family }} \ + --output-dir build \ + --bootstrap + + - name: Fetch sources + timeout-minutes: 30 + run: ./build_tools/fetch_sources.py --stage ${STAGE_NAME} --jobs 12 --depth 1 + + - name: Get stage configuration + id: stage_config + run: | + python build_tools/configure_stage.py \ + --stage ${STAGE_NAME} \ + --amdgpu-families ${{ matrix.family_info.amdgpu_family }} \ + --dist-amdgpu-families "${{ inputs.dist_amdgpu_families }}" \ + --gha-output + + - name: Install stage python deps + if: ${{ steps.stage_config.outputs.pip_install_cmd }} + run: pip install ${{ steps.stage_config.outputs.pip_install_cmd }} + + - name: Configure + run: | + cmake -B build -S . -GNinja \ + -DTHEROCK_PACKAGE_VERSION=${{ inputs.rocm_package_version }} \ + -DTHEROCK_AMDGPU_FAMILIES=${{ matrix.family_info.amdgpu_family }} \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + ${{ steps.stage_config.outputs.cmake_args }} + + - name: Build stage + run: | + cmake --build build --target stage-${STAGE_NAME} therock-artifacts -- -k 0 + + - name: Report + if: ${{ !cancelled() }} + run: | + echo "CCache Stats:" + ccache -s -v + echo "Artifacts:" + ls -lh build/artifacts/*.tar.xz 2>/dev/null || echo "No artifacts found" + + - name: Configure AWS Credentials (refresh for push) + if: ${{ !github.event.pull_request.head.repo.fork }} + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-ci + + - name: Push stage artifacts + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + python build_tools/artifact_manager.py push --run-id ${{ github.run_id }} \ + --stage ${STAGE_NAME} \ + --amdgpu-families ${{ matrix.family_info.amdgpu_family }} \ + --build-dir build + + # ========================================================================== + # STAGE: comm-libs (per-arch, parallel to math-libs) + # ========================================================================== + comm-libs: + name: Stage - Comm Libs (${{ matrix.family_info.amdgpu_family }}) + needs: compiler-runtime + strategy: + fail-fast: false + matrix: + family_info: ${{ fromJSON(inputs.matrix_per_family_json) }} + runs-on: azure-linux-scale-rocm + timeout-minutes: 240 # 4 hours + permissions: + id-token: write + container: + image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:583d473f263a289222c48d4b493e2956b2354a45796f09dee6f2c8ecd4504ab6 + options: -v /runner/config:/home/awsconfig/ + env: + AWS_SHARED_CREDENTIALS_FILE: /home/awsconfig/credentials.ini + STAGE_NAME: comm-libs + AMDGPU_FAMILIES: ${{ matrix.family_info.amdgpu_family }} + steps: + - name: Checkout Repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - name: Install python deps + run: pip install -r requirements.txt + + - name: Adjust git config + run: | + git config --global --add safe.directory $PWD + git config fetch.parallel 10 + + - name: Setup ccache + run: | + ./build_tools/setup_ccache.py \ + --config-preset "github-oss-presubmit" \ + --dir "$(dirname $CCACHE_CONFIGPATH)" \ + --local-path "$CACHE_DIR/ccache" + + - name: Runner health status + run: | + ./build_tools/health_status.py + + - name: Configure AWS Credentials + if: ${{ !github.event.pull_request.head.repo.fork }} + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-ci + + - name: Fetch inbound artifacts + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + python build_tools/artifact_manager.py fetch --run-id ${{ github.run_id }} \ + --stage ${STAGE_NAME} \ + --amdgpu-families ${{ matrix.family_info.amdgpu_family }} \ + --output-dir build \ + --bootstrap + + - name: Fetch sources + timeout-minutes: 30 + run: ./build_tools/fetch_sources.py --stage ${STAGE_NAME} --jobs 12 --depth 1 + + - name: Get stage configuration + id: stage_config + run: | + python build_tools/configure_stage.py \ + --stage ${STAGE_NAME} \ + --amdgpu-families ${{ matrix.family_info.amdgpu_family }} \ + --dist-amdgpu-families "${{ inputs.dist_amdgpu_families }}" \ + --gha-output + + - name: Install stage python deps + if: ${{ steps.stage_config.outputs.pip_install_cmd }} + run: pip install ${{ steps.stage_config.outputs.pip_install_cmd }} + + - name: Configure + run: | + cmake -B build -S . -GNinja \ + -DTHEROCK_PACKAGE_VERSION=${{ inputs.rocm_package_version }} \ + -DTHEROCK_AMDGPU_FAMILIES=${{ matrix.family_info.amdgpu_family }} \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + ${{ steps.stage_config.outputs.cmake_args }} + + - name: Build stage + run: | + cmake --build build --target stage-${STAGE_NAME} therock-artifacts -- -k 0 + + - name: Report + if: ${{ !cancelled() }} + run: | + echo "CCache Stats:" + ccache -s -v + echo "Artifacts:" + ls -lh build/artifacts/*.tar.xz 2>/dev/null || echo "No artifacts found" + + - name: Configure AWS Credentials (refresh for push) + if: ${{ !github.event.pull_request.head.repo.fork }} + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-ci + + - name: Push stage artifacts + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + python build_tools/artifact_manager.py push --run-id ${{ github.run_id }} \ + --stage ${STAGE_NAME} \ + --amdgpu-families ${{ matrix.family_info.amdgpu_family }} \ + --build-dir build + + # ========================================================================== + # STAGE: dctools-core (generic, parallel to math-libs) + # ========================================================================== + dctools-core: + name: Stage - DC Tools Core + needs: compiler-runtime + runs-on: azure-linux-scale-rocm + timeout-minutes: 120 # 2 hours + permissions: + id-token: write + container: + image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:583d473f263a289222c48d4b493e2956b2354a45796f09dee6f2c8ecd4504ab6 + options: -v /runner/config:/home/awsconfig/ + env: + AWS_SHARED_CREDENTIALS_FILE: /home/awsconfig/credentials.ini + STAGE_NAME: dctools-core + steps: + - name: Checkout Repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - name: Install python deps + run: pip install -r requirements.txt + + - name: Adjust git config + run: | + git config --global --add safe.directory $PWD + git config fetch.parallel 10 + + - name: Setup ccache + run: | + ./build_tools/setup_ccache.py \ + --config-preset "github-oss-presubmit" \ + --dir "$(dirname $CCACHE_CONFIGPATH)" \ + --local-path "$CACHE_DIR/ccache" + + - name: Runner health status + run: | + ./build_tools/health_status.py + + - name: Configure AWS Credentials + if: ${{ !github.event.pull_request.head.repo.fork }} + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-ci + + - name: Fetch inbound artifacts + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + python build_tools/artifact_manager.py fetch --run-id ${{ github.run_id }} \ + --stage ${STAGE_NAME} \ + --output-dir build \ + --bootstrap + + - name: Fetch sources + timeout-minutes: 30 + run: ./build_tools/fetch_sources.py --stage ${STAGE_NAME} --jobs 12 --depth 1 + + - name: Get stage configuration + id: stage_config + run: | + python build_tools/configure_stage.py \ + --stage ${STAGE_NAME} \ + --dist-amdgpu-families "${{ inputs.dist_amdgpu_families }}" \ + --gha-output + + - name: Install stage python deps + if: ${{ steps.stage_config.outputs.pip_install_cmd }} + run: pip install ${{ steps.stage_config.outputs.pip_install_cmd }} + + - name: Configure + run: | + cmake -B build -S . -GNinja \ + -DTHEROCK_PACKAGE_VERSION=${{ inputs.rocm_package_version }} \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + ${{ steps.stage_config.outputs.cmake_args }} + + - name: Build stage + run: | + cmake --build build --target stage-${STAGE_NAME} therock-artifacts -- -k 0 + + - name: Report + if: ${{ !cancelled() }} + run: | + echo "CCache Stats:" + ccache -s -v + echo "Artifacts:" + ls -lh build/artifacts/*.tar.xz 2>/dev/null || echo "No artifacts found" + + - name: Configure AWS Credentials (refresh for push) + if: ${{ !github.event.pull_request.head.repo.fork }} + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-ci + + - name: Push stage artifacts + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + python build_tools/artifact_manager.py push --run-id ${{ github.run_id }} \ + --stage ${STAGE_NAME} \ + --build-dir build + + # ========================================================================== + # STAGE: profiler-apps (generic, parallel to math-libs) + # ========================================================================== + profiler-apps: + name: Stage - Profiler Apps + needs: compiler-runtime + runs-on: azure-linux-scale-rocm + timeout-minutes: 180 # 3 hours + permissions: + id-token: write + container: + image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:583d473f263a289222c48d4b493e2956b2354a45796f09dee6f2c8ecd4504ab6 + options: -v /runner/config:/home/awsconfig/ + env: + AWS_SHARED_CREDENTIALS_FILE: /home/awsconfig/credentials.ini + STAGE_NAME: profiler-apps + steps: + - name: Checkout Repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - name: Install python deps + run: pip install -r requirements.txt + + - name: Adjust git config + run: | + git config --global --add safe.directory $PWD + git config fetch.parallel 10 + + - name: Setup ccache + run: | + ./build_tools/setup_ccache.py \ + --config-preset "github-oss-presubmit" \ + --dir "$(dirname $CCACHE_CONFIGPATH)" \ + --local-path "$CACHE_DIR/ccache" + + - name: Runner health status + run: | + ./build_tools/health_status.py + + - name: Configure AWS Credentials + if: ${{ !github.event.pull_request.head.repo.fork }} + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-ci + + - name: Fetch inbound artifacts + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + python build_tools/artifact_manager.py fetch --run-id ${{ github.run_id }} \ + --stage ${STAGE_NAME} \ + --output-dir build \ + --bootstrap + + - name: Fetch sources + timeout-minutes: 30 + run: ./build_tools/fetch_sources.py --stage ${STAGE_NAME} --jobs 12 --depth 1 + + - name: Get stage configuration + id: stage_config + run: | + python build_tools/configure_stage.py \ + --stage ${STAGE_NAME} \ + --dist-amdgpu-families "${{ inputs.dist_amdgpu_families }}" \ + --gha-output + + - name: Install stage python deps + if: ${{ steps.stage_config.outputs.pip_install_cmd }} + run: pip install ${{ steps.stage_config.outputs.pip_install_cmd }} + + - name: Configure + run: | + cmake -B build -S . -GNinja \ + -DTHEROCK_PACKAGE_VERSION=${{ inputs.rocm_package_version }} \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + ${{ steps.stage_config.outputs.cmake_args }} + + - name: Build stage + run: | + cmake --build build --target stage-${STAGE_NAME} therock-artifacts -- -k 0 + + - name: Report + if: ${{ !cancelled() }} + run: | + echo "CCache Stats:" + ccache -s -v + echo "Artifacts:" + ls -lh build/artifacts/*.tar.xz 2>/dev/null || echo "No artifacts found" + + - name: Configure AWS Credentials (refresh for push) + if: ${{ !github.event.pull_request.head.repo.fork }} + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-ci + + - name: Push stage artifacts + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + python build_tools/artifact_manager.py push --run-id ${{ github.run_id }} \ + --stage ${STAGE_NAME} \ + --build-dir build + + # ========================================================================== + # STAGE: media (generic) + # ========================================================================== + media: + name: Stage - Media + needs: foundation + runs-on: azure-linux-scale-rocm + timeout-minutes: 180 # 3 hours + permissions: + id-token: write + container: + image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:583d473f263a289222c48d4b493e2956b2354a45796f09dee6f2c8ecd4504ab6 + options: -v /runner/config:/home/awsconfig/ + env: + AWS_SHARED_CREDENTIALS_FILE: /home/awsconfig/credentials.ini + STAGE_NAME: media + steps: + - name: Checkout Repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - name: Install python deps + run: pip install -r requirements.txt + + - name: Adjust git config + run: | + git config --global --add safe.directory $PWD + git config fetch.parallel 10 + + - name: Setup ccache + run: | + ./build_tools/setup_ccache.py \ + --config-preset "github-oss-presubmit" \ + --dir "$(dirname $CCACHE_CONFIGPATH)" \ + --local-path "$CACHE_DIR/ccache" + + - name: Runner health status + run: | + ./build_tools/health_status.py + + - name: Configure AWS Credentials + if: ${{ !github.event.pull_request.head.repo.fork }} + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-ci + + - name: Fetch inbound artifacts + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + python build_tools/artifact_manager.py fetch --run-id ${{ github.run_id }} \ + --stage ${STAGE_NAME} \ + --output-dir build \ + --bootstrap + + - name: Fetch sources + timeout-minutes: 30 + run: ./build_tools/fetch_sources.py --stage ${STAGE_NAME} --jobs 12 --depth 1 + + - name: Get stage configuration + id: stage_config + run: | + python build_tools/configure_stage.py \ + --stage ${STAGE_NAME} \ + --dist-amdgpu-families "${{ inputs.dist_amdgpu_families }}" \ + --gha-output + + - name: Install stage python deps + if: ${{ steps.stage_config.outputs.pip_install_cmd }} + run: pip install ${{ steps.stage_config.outputs.pip_install_cmd }} + + - name: Configure + run: | + cmake -B build -S . -GNinja \ + -DTHEROCK_PACKAGE_VERSION=${{ inputs.rocm_package_version }} \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + ${{ steps.stage_config.outputs.cmake_args }} + + - name: Build stage + run: | + cmake --build build --target stage-${STAGE_NAME} therock-artifacts -- -k 0 + + - name: Report + if: ${{ !cancelled() }} + run: | + echo "CCache Stats:" + ccache -s -v + echo "Artifacts:" + ls -lh build/artifacts/*.tar.xz 2>/dev/null || echo "No artifacts found" + + - name: Configure AWS Credentials + if: ${{ !github.event.pull_request.head.repo.fork }} + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-ci + + - name: Push stage artifacts + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + python build_tools/artifact_manager.py push --run-id ${{ github.run_id }} \ + --stage ${STAGE_NAME} \ + --build-dir build diff --git a/.github/workflows/multi_arch_ci.yml b/.github/workflows/multi_arch_ci.yml new file mode 100644 index 0000000000000..73a6a74b9df2c --- /dev/null +++ b/.github/workflows/multi_arch_ci.yml @@ -0,0 +1,142 @@ +# Multi-Arch CI +# +# This is a staging workflow for the sharded multi-arch build pipeline. +# It mirrors ci.yml but uses multi_arch_build_portable_linux.yml instead of +# ci_linux.yml. Once validated, ci.yml will be updated to use the multi-arch +# sub-workflows directly. + +name: Multi-Arch CI + +on: + push: + branches: + # While we are iterating on testing. + - 'multi_arch/**' + workflow_dispatch: + inputs: + linux_amdgpu_families: + type: string + description: "Insert comma-separated list of Linux GPU families to build and test. ex: gfx94X, gfx1201X" + default: "" + linux_test_labels: + type: string + description: "If enabled, reduce test set on Linux to the list of labels prefixed with 'test:'. ex: test:rocprim, test:hipcub" + default: "" + linux_use_prebuilt_artifacts: + type: boolean + description: "If enabled, the CI will pull Linux artifacts using artifact_run_id and only run tests" + windows_amdgpu_families: + type: string + description: "Insert comma-separated list of Windows GPU families to build and test. ex: gfx94X, gfx1201X" + default: "" + windows_test_labels: + type: string + description: "If enabled, reduce test set on Windows to the list of labels prefixed with 'test:' ex: test:rocprim, test:hipcub" + default: "" + windows_use_prebuilt_artifacts: + type: boolean + description: "If enabled, the CI will pull Windows artifacts using artifact_run_id and only run tests" + artifact_run_id: + type: string + description: "If provided, the tests will run on this artifact ID" + default: "" + # pull_request: + # types: + # - labeled + # - opened + # - synchronize + +permissions: + contents: read + +concurrency: + # A PR number if a pull request and otherwise the commit hash. This cancels + # queued and in-progress runs for the same PR (presubmit) or commit + # (postsubmit). The workflow name is prepended to avoid conflicts between + # different workflows. + group: ${{ github.workflow }}-${{ github.event.number || github.sha }} + cancel-in-progress: true + +jobs: + setup: + uses: ./.github/workflows/setup.yml + with: + build_variant: "release" + multi_arch: true + + linux_build_and_test: + name: Linux::${{ matrix.variant.build_variant_label }} + needs: setup + if: >- + ${{ + needs.setup.outputs.linux_variants != '[]' && + needs.setup.outputs.enable_build_jobs == 'true' + }} + strategy: + fail-fast: false + matrix: + variant: ${{ fromJSON(needs.setup.outputs.linux_variants) }} + uses: ./.github/workflows/multi_arch_build_portable_linux.yml + secrets: inherit + with: + matrix_per_family_json: ${{ matrix.variant.matrix_per_family_json }} + dist_amdgpu_families: ${{ matrix.variant.dist_amdgpu_families }} + artifact_group: ${{ matrix.variant.artifact_group }} + build_variant_label: ${{ matrix.variant.build_variant_label }} + build_variant_suffix: ${{ matrix.variant.build_variant_suffix }} + build_variant_cmake_preset: ${{ matrix.variant.build_variant_cmake_preset }} + test_labels: ${{ needs.setup.outputs.linux_test_labels }} + artifact_run_id: ${{ inputs.artifact_run_id }} + expect_failure: ${{ matrix.variant.expect_failure == true }} + use_prebuilt_artifacts: ${{ inputs.linux_use_prebuilt_artifacts == true && 'true' || 'false' }} + rocm_package_version: ${{ needs.setup.outputs.rocm_package_version }} + test_type: ${{ needs.setup.outputs.test_type }} + permissions: + contents: read + id-token: write + + # TODO: Add windows_build_and_test when ready + # windows_build_and_test: + # name: Windows::${{ matrix.variant.family }}::${{ matrix.variant.build_variant_label }} + # needs: setup + # if: >- + # ${{ + # needs.setup.outputs.windows_variants != '[]' && + # needs.setup.outputs.enable_build_jobs == 'true' + # }} + # strategy: + # fail-fast: false + # matrix: + # variant: ${{ fromJSON(needs.setup.outputs.windows_variants) }} + # uses: ./.github/workflows/ci_windows.yml + # ... + + ci_summary: + name: CI Summary + if: always() + needs: + - setup + - linux_build_and_test + runs-on: ubuntu-24.04 + steps: + - name: Output failed jobs + run: | + # Build a list of failed jobs, but ignore those marked continue-on-error + FAILED_JOBS="$(echo '${{ toJson(needs) }}' \ + | jq --raw-output ' + to_entries + | map(select( + (.value.result != "success" and .value.result != "skipped") + and (.value.outputs.continue_on_error | not) + )) + | map(.key) + | join(",") + ' \ + )" + + if [[ -n "${FAILED_JOBS}" ]]; then + echo "The following jobs failed: ${FAILED_JOBS}" + exit 1 + else + echo "All required jobs succeeded (continue-on-error jobs ignored)." + fi diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000000000..a129cad3f0c1a --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,21 @@ +name: pre-commit + +on: + pull_request: + push: + branches: [main] + +permissions: + contents: read + +jobs: + pre-commit: + runs-on: ubuntu-24.04 + steps: + - name: Checkout TheRock repository + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + repository: "ROCm/TheRock" + fetch-depth: 10 + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 diff --git a/.github/workflows/publish_build_manylinux_rccl_x86_64.yml b/.github/workflows/publish_build_manylinux_rccl_x86_64.yml new file mode 100644 index 0000000000000..5e9c22824da45 --- /dev/null +++ b/.github/workflows/publish_build_manylinux_rccl_x86_64.yml @@ -0,0 +1,21 @@ +name: Publish build_manylinux_rccl_x86_64 images +on: + workflow_dispatch: + push: + branches: + - 'main' + - 'stage/docker/**' + paths: + - dockerfiles/build_manylinux_rccl_x86_64*.Dockerfile + - .github/workflows/publish_build_manylinux_rccl_x86_64.yml + +permissions: + contents: read + packages: write + +jobs: + publish_build_manylinux_x86_64: + uses: ./.github/workflows/publish_dockerfile.yml + with: + DOCKER_FILE_NAME: build_manylinux_rccl_x86_64 + DOCKER_IMAGE_NAME: therock_build_manylinux_rccl_x86_64 diff --git a/.github/workflows/publish_build_manylinux_x86_64.yml b/.github/workflows/publish_build_manylinux_x86_64.yml new file mode 100644 index 0000000000000..4501d1fe776db --- /dev/null +++ b/.github/workflows/publish_build_manylinux_x86_64.yml @@ -0,0 +1,21 @@ +name: Publish build_manylinux_x86_64 images +on: + workflow_dispatch: + push: + branches: + - 'main' + - 'stage/docker/**' + paths: + - dockerfiles/build_manylinux_x86_64*.Dockerfile + - .github/workflows/publish_build_manylinux_x86_64.yml + +permissions: + contents: read + packages: write + +jobs: + publish_build_manylinux_x86_64: + uses: ./.github/workflows/publish_dockerfile.yml + with: + DOCKER_FILE_NAME: build_manylinux_x86_64 + DOCKER_IMAGE_NAME: therock_build_manylinux_x86_64 diff --git a/.github/workflows/publish_dockerfile.yml b/.github/workflows/publish_dockerfile.yml new file mode 100644 index 0000000000000..bb725e88a8cd0 --- /dev/null +++ b/.github/workflows/publish_dockerfile.yml @@ -0,0 +1,70 @@ +name: Publish TheRock Docker image +on: + workflow_call: + inputs: + DOCKER_FILE_NAME: + type: string + DOCKER_IMAGE_NAME: + type: string + +jobs: + build-and-push-image: + runs-on: ubuntu-24.04 + env: + REGISTRY: ghcr.io + IMAGE_NAME: ROCm/${{ inputs.DOCKER_IMAGE_NAME }} + # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. + permissions: + contents: read + packages: write + steps: + - name: Checkout repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - name: Log in to the Container registry + uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Sanitization of tag names is done automatically by the metadata-action + - name: Determine Docker tag + id: tag + run: | + ref="${{ github.ref_name }}" + if [[ "$ref" == stage/docker/* ]]; then + suffix="${ref#stage/docker/}" + echo "TAG_SUFFIX=stage-${suffix}" >> "$GITHUB_OUTPUT" + elif [[ "$ref" == "main" ]]; then + echo "TAG_SUFFIX=latest" >> "$GITHUB_OUTPUT" + else + echo "TAG_SUFFIX=${ref}" >> "$GITHUB_OUTPUT" + fi + + # Adds extra tags to the image, with the default tags from https://github.com/docker/metadata-action#tags-input + # The custom tag is for the branches prefixed with `stage/docker/`. + # For the default branch (i.e., main), the default behaviour remains and is labelled `latest`. + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # v5.10.0 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=schedule + type=ref,event=branch,enable={{is_default_branch}} + type=ref,event=tag + type=ref,event=pr + type=raw,value=${{ steps.tag.outputs.TAG_SUFFIX }} + + # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. + # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. + # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. + - name: Build and push Docker image + uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0 + with: + context: dockerfiles/ + file: dockerfiles/${{ inputs.DOCKER_FILE_NAME }}.Dockerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/publish_no_rocm_image_ubuntu24_04.yml b/.github/workflows/publish_no_rocm_image_ubuntu24_04.yml new file mode 100644 index 0000000000000..ca562fc899e62 --- /dev/null +++ b/.github/workflows/publish_no_rocm_image_ubuntu24_04.yml @@ -0,0 +1,21 @@ +name: Publish no_rocm_image_ubuntu24_04 images +on: + workflow_dispatch: + push: + branches: + - 'main' + - 'stage/docker/**' + paths: + - dockerfiles/no_rocm_image_ubuntu24_04*.Dockerfile + - .github/workflows/publish_no_rocm_image_ubuntu24_04.yml + +permissions: + contents: read + packages: write + +jobs: + publish_no_rocm_image_ubuntu24_04: + uses: ./.github/workflows/publish_dockerfile.yml + with: + DOCKER_FILE_NAME: no_rocm_image_ubuntu24_04 + DOCKER_IMAGE_NAME: no_rocm_image_ubuntu24_04 diff --git a/.github/workflows/release_native_linux_packages.yml b/.github/workflows/release_native_linux_packages.yml new file mode 100644 index 0000000000000..50e4dd2972797 --- /dev/null +++ b/.github/workflows/release_native_linux_packages.yml @@ -0,0 +1,67 @@ +name: Release native Linux Packages + +on: + workflow_call: + inputs: + amdgpu_family: + description: gfx arch for creating the s3 bucket url + required: true + type: string + artifact_run_id: + description: workflow run id to download the artifacts from + type: string + rocm_version: + description: ROCm version to append to the package( Like 8.0.0 or 8.1.0). + required: true + type: string + package_type: + description: Specify whether debian or rpm packages are needed (deb or rpm). + required: true + type: string + package_suffix: + description: The suffix to be added to package name(build_no or master or rc or combiantion). + required: true + type: string + workflow_dispatch: + inputs: + amdgpu_family: + type: string + default: gfx94X-dcgpu + artifact_run_id: + description: workflow run id to download the artifacts from + type: string + rocm_version: + description: ROCm version to append to the package( Like 7.0.0 or 7.1.0) + type: string + default: "0.0.1" + package_type: + description: Specify whether debian or rpm packages are needed (deb or rpm). + required: true + type: choice + options: + - rpm + - deb + default: "rpm" + package_suffix: + description: The suffix to be added to package name(build_no or master or rc or combiantion). + type: string + default: "test" + +permissions: + id-token: write + contents: read + +run-name: Release native Linux packages (${{ inputs.amdgpu_family }}, ${{ inputs.rocm_version }}, ${{ inputs.package_type }}, ${{ inputs.package_suffix }}) + +jobs: + release: + name: Release Native Linux Package + strategy: + fail-fast: false + uses: ./.github/workflows/build_native_linux_packages.yml + with: + artifact_group: ${{ inputs.amdgpu_family }} + artifact_run_id: ${{ inputs.artifact_run_id }} + rocm_version: ${{ inputs.rocm_version }} + native_package_type: ${{ inputs.package_type }} + package_suffix: ${{ inputs.package_suffix }} diff --git a/.github/workflows/release_portable_linux_packages.yml b/.github/workflows/release_portable_linux_packages.yml new file mode 100644 index 0000000000000..133f7403de1d1 --- /dev/null +++ b/.github/workflows/release_portable_linux_packages.yml @@ -0,0 +1,380 @@ +name: Release portable Linux packages + +on: + # Trigger from another workflow (typically to build dev packages and then test them) + workflow_call: + inputs: + release_type: + description: The type of release to build ("dev", "nightly", or "prerelease"). All developer-triggered jobs should use "dev"! + type: string + default: "dev" + package_suffix: + type: string + s3_subdir: + description: "Subdirectory to push the packages" + type: string + default: "v2" + s3_staging_subdir: + description: "Staging subdirectory to push the packages" + type: string + default: "v2-staging" + families: + description: "Comma separated list of AMD GPU families, e.g. `gfx94X,gfx103x`" + type: string + prerelease_version: + description: "(Optional) Number of the prerelease" + type: string + repository: + description: "Repository to checkout. Otherwise, defaults to `github.repository`." + type: string + ref: + description: "Branch, tag or SHA to checkout. Defaults to the reference or SHA that triggered the workflow." + type: string + # Trigger manually (typically to test the workflow or manually build a release [candidate]) + workflow_dispatch: + inputs: + release_type: + description: The type of release to build ("dev", "nightly", or "prerelease"). All developer-triggered jobs should use "dev"! + type: string + default: "dev" + package_suffix: + type: string + s3_subdir: + description: "Subdirectory to push the packages" + type: string + default: "v2" + s3_staging_subdir: + description: "Staging subdirectory to push the packages" + type: string + default: "v2-staging" + families: + description: "Comma separated list of AMD GPU families, e.g. `gfx94X,gfx103x`" + type: string + prerelease_version: + description: "(Optional) Number of the prerelease" + type: string + # Trigger on a schedule to build nightly release candidates. + schedule: + # Runs at 04:00 AM UTC, which is 8:00 PM PST (UTC-8) + - cron: '0 04 * * *' + +permissions: + contents: read + +run-name: Release portable Linux packages (${{ inputs.families || 'default' }}, ${{ inputs.release_type || 'nightly' }}) + +jobs: + setup_metadata: + if: ${{ github.repository_owner == 'ROCm' || github.event_name != 'schedule' }} + runs-on: ubuntu-24.04 + env: + release_type: ${{ inputs.release_type || 'nightly' }} + outputs: + version: ${{ steps.rocm_package_version.outputs.rocm_package_version }} + rpm_version: ${{ steps.rocm_native_package_version.outputs.rocm_rpm_package_version }} + deb_version: ${{ steps.rocm_native_package_version.outputs.rocm_deb_package_version }} + release_type: ${{ env.release_type }} + package_targets: ${{ steps.configure.outputs.package_targets }} + cloudfront_url: ${{ steps.release_information.outputs.cloudfront_url }} + cloudfront_staging_url: ${{ steps.release_information.outputs.cloudfront_staging_url }} + s3_subdir_tar: ${{ steps.release_information.outputs.s3_subdir_tar }} + steps: + - name: Checkout repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + + - name: Setup Python + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: 3.12 + + - name: Compute package version + id: rocm_package_version + run: | + python ./build_tools/compute_rocm_package_version.py \ + --release-type=${{ env.release_type }} \ + --prerelease-version=${{ inputs.prerelease_version }} + + - name: Compute native package version + id: rocm_native_package_version + run: | + # Compute rpm package version + # This sets the 'rocm_rpm_package_version' output + python ./build_tools/compute_rocm_package_version.py \ + --release-type=${{ env.release_type }} \ + --prerelease-version=${{ inputs.prerelease_version }} \ + --package-type="rpm" + # Compute debian package version + # This sets the 'rocm_deb_package_version' output + python ./build_tools/compute_rocm_package_version.py \ + --release-type=${{ env.release_type }} \ + --prerelease-version=${{ inputs.prerelease_version }} \ + --package-type="deb" + + - name: Set variables for nightly release + if: ${{ env.release_type == 'nightly' }} + run: | + echo "tmp_cloudfront_url=https://rocm.nightlies.amd.com/v2" >> $GITHUB_ENV + echo "tmp_cloudfront_staging_url=https://rocm.nightlies.amd.com/v2-staging" >> $GITHUB_ENV + echo "tmp_s3_subdir_tar=''" >> $GITHUB_ENV + + - name: Set variables for prerelease + if: ${{ env.release_type == 'prerelease' }} + run: | + echo "tmp_cloudfront_url=https://rocm.prereleases.amd.com/whl" >> $GITHUB_ENV + echo "tmp_cloudfront_staging_url=https://rocm.prereleases.amd.com/whl-staging" >> $GITHUB_ENV + echo "tmp_s3_subdir_tar=v3/tarball/" >> $GITHUB_ENV + + - name: Set variables for development release + if: ${{ env.release_type == 'dev' }} + run: | + echo "tmp_cloudfront_url=https://rocm.devreleases.amd.com/v2" >> $GITHUB_ENV + echo "tmp_cloudfront_staging_url=https://rocm.devreleases.amd.com/v2-staging" >> $GITHUB_ENV + echo "tmp_s3_subdir_tar=''" >> $GITHUB_ENV + + - name: Generate release information + id: release_information + run: | + echo "cloudfront_url=${tmp_cloudfront_url}" >> $GITHUB_OUTPUT + echo "cloudfront_staging_url=${tmp_cloudfront_staging_url}" >> $GITHUB_OUTPUT + echo "s3_subdir_tar=${tmp_s3_subdir_tar}" >> $GITHUB_OUTPUT + + - name: Generating package target matrix + id: configure + env: + AMDGPU_FAMILIES: ${{ inputs.families }} + THEROCK_PACKAGE_PLATFORM: "linux" + # Variable comes from ROCm organization variable 'ROCM_THEROCK_TEST_RUNNERS' + ROCM_THEROCK_TEST_RUNNERS: ${{ vars.ROCM_THEROCK_TEST_RUNNERS }} + LOAD_TEST_RUNNERS_FROM_VAR: false + run: python ./build_tools/github_actions/fetch_package_targets.py + + portable_linux_packages: + name: ${{ matrix.target_bundle.amdgpu_family }}::Build Portable Linux + runs-on: ${{ github.repository_owner == 'ROCm' && 'azure-linux-scale-rocm' || 'ubuntu-24.04' }} + continue-on-error: ${{ matrix.target_bundle.expect_failure == true }} # for GPU families that are flaky, we mark as xfail + timeout-minutes: 720 # 12 hour timeout + needs: [setup_metadata] + permissions: + contents: write + actions: write # Added permission to trigger workflows + id-token: write # Added permission for AWS S3 upload + strategy: + fail-fast: false + matrix: + target_bundle: ${{ fromJSON(needs.setup_metadata.outputs.package_targets) }} + env: + TEATIME_LABEL_GH_GROUP: 1 + OUTPUT_DIR: ${{ github.workspace }}/output + BUILD_IMAGE: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:583d473f263a289222c48d4b493e2956b2354a45796f09dee6f2c8ecd4504ab6 + DIST_ARCHIVE: "${{ github.workspace }}/output/therock-dist-linux-${{ matrix.target_bundle.amdgpu_family }}${{ inputs.package_suffix }}-${{ needs.setup_metadata.outputs.version }}.tar.gz" + FILE_NAME: "therock-dist-linux-${{ matrix.target_bundle.amdgpu_family }}${{ inputs.package_suffix }}-${{ needs.setup_metadata.outputs.version }}.tar.gz" + RELEASE_TYPE: "${{ needs.setup_metadata.outputs.release_type }}" + S3_BUCKET_TAR: "therock-${{ needs.setup_metadata.outputs.release_type }}-tarball" + S3_SUBDIR_TAR: ${{ needs.setup_metadata.outputs.s3_subdir_tar }} + S3_BUCKET_PY: "therock-${{ needs.setup_metadata.outputs.release_type }}-python" + S3_SUBDIR: ${{ inputs.s3_subdir || 'v2' }} + S3_STAGING_SUBDIR: ${{ inputs.s3_staging_subdir || 'v2-staging' }} + MANYLINUX: 1 + + steps: + - name: "Checking out repository" + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: 3.12 + + # TODO: We shouldn't be using a cache on actual release branches, but it + # really helps for iteration time. + - name: Enable cache + uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + path: ${{ env.OUTPUT_DIR }}/caches + key: portable-linux-package-matrix-v1-${{ matrix.target_bundle.amdgpu_family }}-${{ github.sha }} + restore-keys: | + portable-linux-package-matrix-v1-${{ matrix.target_bundle.amdgpu_family }}- + + - name: Install the AWS tool + run: ./dockerfiles/install_awscli.sh + + - name: Fetch sources + timeout-minutes: 30 + run: | + # Prefetch docker container in background. + docker pull ${{ env.BUILD_IMAGE }} & + ./build_tools/fetch_sources.py --jobs 10 + wait + + - name: Build Projects + run: | + ./build_tools/linux_portable_build.py \ + --image=${{ env.BUILD_IMAGE }} \ + --output-dir=${{ env.OUTPUT_DIR }} \ + -- \ + "-DTHEROCK_AMDGPU_FAMILIES=${{ matrix.target_bundle.amdgpu_family }}" + cd ${{ env.OUTPUT_DIR }}/build/dist/rocm + echo "Building ${{ env.DIST_ARCHIVE }}" + tar cfz "${{ env.DIST_ARCHIVE }}" . + + - name: Build Python Packages + run: | + ./build_tools/linux_portable_build.py \ + --image=${{ env.BUILD_IMAGE }} \ + --output-dir=${{ env.OUTPUT_DIR }}/packages \ + --build-python-only \ + --artifact-dir=${{ env.OUTPUT_DIR }}/build/artifacts \ + -- \ + "--version=${{ needs.setup_metadata.outputs.version }}" + + - name: Grant ownership over output directory + if: ${{ !cancelled() }} + run: | + sudo chown -R $(whoami) ${{ env.OUTPUT_DIR }} + + - name: Build Report + if: ${{ !cancelled() }} + run: | + echo "Full SDK du:" + echo "------------" + du -h -d 1 ${{ env.OUTPUT_DIR }}/build/dist/rocm + + # Analyze ninja build log to generate per-component timing report + - name: Analyze Build Times + if: ${{ !cancelled() }} + run: | + python3 build_tools/analyze_build_times.py --build-dir ${{ env.OUTPUT_DIR }}/build + + - name: Configure AWS Credentials + if: ${{ github.repository_owner == 'ROCm' && !cancelled() }} + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-${{ env.RELEASE_TYPE }} + + - name: Post Build Upload + if: ${{ github.repository_owner == 'ROCm' && !cancelled() }} + run: | + python3 build_tools/github_actions/post_build_upload.py \ + --run-id ${{ github.run_id }} \ + --artifact-group "${{ matrix.target_bundle.amdgpu_family }}" \ + --build-dir ${{ env.OUTPUT_DIR }}/build \ + --upload \ + --job-status ${{ job.status }} + + - name: Upload Releases to staging S3 + if: ${{ github.repository_owner == 'ROCm' }} + run: | + aws s3 cp ${{ env.OUTPUT_DIR }}/packages/dist/ s3://${{ env.S3_BUCKET_PY }}/${{ env.S3_STAGING_SUBDIR }}/${{ matrix.target_bundle.amdgpu_family }}/ \ + --recursive --no-follow-symlinks \ + --exclude "*" \ + --include "*.whl" \ + --include "*.tar.gz" + + - name: (Re-)Generate Python package release index for staging + if: ${{ github.repository_owner == 'ROCm' }} + env: + # Environment variable to be set for `manage.py` + CUSTOM_PREFIX: "${{ env.S3_STAGING_SUBDIR }}/${{ matrix.target_bundle.amdgpu_family }}" + run: | + pip install boto3 packaging + python ./build_tools/third_party/s3_management/manage.py ${{ env.CUSTOM_PREFIX }} + + ## TODO: Restrict uploading to the non-staging S3 directory until ROCm sanity checks and all validation tests have successfully passed. + - name: Upload Releases to S3 + if: ${{ github.repository_owner == 'ROCm' }} + run: | + aws s3 cp ${{ env.DIST_ARCHIVE }} s3://${{ env.S3_BUCKET_TAR }}/${{ env.S3_SUBDIR_TAR }} + aws s3 cp ${{ env.OUTPUT_DIR }}/packages/dist/ s3://${{ env.S3_BUCKET_PY }}/${{ env.S3_SUBDIR }}/${{ matrix.target_bundle.amdgpu_family }}/ \ + --recursive --no-follow-symlinks \ + --exclude "*" \ + --include "*.whl" \ + --include "*.tar.gz" + + - name: (Re-)Generate release index pages + if: ${{ github.repository_owner == 'ROCm' }} + env: + # Environment variable to be set for `manage.py` + CUSTOM_PREFIX: "${{ env.S3_SUBDIR }}/${{ matrix.target_bundle.amdgpu_family }}" + run: | + pip install boto3 packaging + python ./build_tools/third_party/s3_management/manage.py ${{ env.CUSTOM_PREFIX }} + python ./build_tools/index_generation_s3_tar.py \ + --bucket ${{ env.S3_BUCKET_TAR }} \ + --directory ${{ env.S3_SUBDIR_TAR }} \ + --upload + + - name: Trigger building PyTorch wheels + if: ${{ github.repository_owner == 'ROCm' && matrix.target_bundle.expect_pytorch_failure == false }} + uses: benc-uk/workflow-dispatch@e2e5e9a103e331dad343f381a29e654aea3cf8fc # v1.2.4 + with: + workflow: release_portable_linux_pytorch_wheels.yml + inputs: | + { "amdgpu_family": "${{ matrix.target_bundle.amdgpu_family }}", + "release_type": "${{ env.RELEASE_TYPE }}", + "s3_subdir": "${{ env.S3_SUBDIR }}", + "s3_staging_subdir": "${{ env.S3_STAGING_SUBDIR }}", + "cloudfront_url": "${{ needs.setup_metadata.outputs.cloudfront_url }}", + "cloudfront_staging_url": "${{ needs.setup_metadata.outputs.cloudfront_staging_url }}", + "rocm_version": "${{ needs.setup_metadata.outputs.version }}", + "ref": "${{ inputs.ref || '' }}" + } + + - name: URL-encode .tar URL + # TODO: Enable JAX wheels for prereleases + if: ${{ env.RELEASE_TYPE != 'prerelease' }} + id: url-encode-tar + run: python -c "from urllib.parse import quote; print('tar_url=https://therock-${{ env.RELEASE_TYPE }}-tarball.s3.amazonaws.com/' + quote('therock-dist-linux-${{ matrix.target_bundle.amdgpu_family }}${{ inputs.package_suffix }}-${{ needs.setup_metadata.outputs.version }}.tar.gz'))" >> ${GITHUB_OUTPUT} + + - name: Trigger build JAX wheels + # TODO: Enable JAX wheels for prereleases + if: ${{ env.RELEASE_TYPE != 'prerelease' && github.repository_owner == 'ROCm' }} + uses: benc-uk/workflow-dispatch@e2e5e9a103e331dad343f381a29e654aea3cf8fc # v1.2.4 + with: + workflow: build_linux_jax_wheels.yml + inputs: | + { "amdgpu_family": "${{ matrix.target_bundle.amdgpu_family }}", + "python_version": "3.12", + "release_type": "${{ env.RELEASE_TYPE }}", + "s3_subdir": "${{ env.S3_STAGING_SUBDIR }}", + "rocm_version": "${{ needs.setup_metadata.outputs.version }}", + "tar_url": "${{ steps.url-encode-tar.outputs.tar_url }}" + } + + - name: Trigger build native rpm package + if: ${{ github.repository_owner == 'ROCm' }} + uses: benc-uk/workflow-dispatch@e2e5e9a103e331dad343f381a29e654aea3cf8fc # v1.2.4 + with: + workflow: build_native_linux_packages.yml + inputs: | + { "artifact_group": "${{ matrix.target_bundle.amdgpu_family }}", + "rocm_version": "${{ needs.setup_metadata.outputs.rpm_version }}", + "release_type": "${{ env.RELEASE_TYPE }}", + "artifact_run_id": "${{ github.run_id }}", + "native_package_type": "rpm" + } + + - name: Trigger build native debian package + if: ${{ github.repository_owner == 'ROCm' }} + uses: benc-uk/workflow-dispatch@e2e5e9a103e331dad343f381a29e654aea3cf8fc # v1.2.4 + with: + workflow: build_native_linux_packages.yml + inputs: | + { "artifact_group": "${{ matrix.target_bundle.amdgpu_family }}", + "rocm_version": "${{ needs.setup_metadata.outputs.deb_version }}", + "release_type": "${{ env.RELEASE_TYPE }}", + "artifact_run_id": "${{ github.run_id }}", + "native_package_type": "deb" + } + + - name: Save cache + uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + if: ${{ !cancelled() }} + with: + path: ${{ env.OUTPUT_DIR }}/caches + key: portable-linux-package-matrix-v1-${{ matrix.target_bundle.amdgpu_family }}-${{ github.sha }} diff --git a/.github/workflows/release_portable_linux_pytorch_wheels.yml b/.github/workflows/release_portable_linux_pytorch_wheels.yml new file mode 100644 index 0000000000000..87b52de133899 --- /dev/null +++ b/.github/workflows/release_portable_linux_pytorch_wheels.yml @@ -0,0 +1,114 @@ +name: Release portable Linux PyTorch Wheels + +on: + workflow_call: + inputs: + amdgpu_family: + required: true + type: string + release_type: + description: The type of release to build ("dev", "nightly", or "prerelease"). All developer-triggered jobs should use "dev"! + type: string + default: "dev" + s3_subdir: + description: S3 subdirectory, not including the GPU-family + type: string + default: "v2" + s3_staging_subdir: + description: Staging subdirectory to push the wheels for test + type: string + default: "v2-staging" + cloudfront_url: + description: CloudFront URL pointing to Python index + required: true + type: string + cloudfront_staging_url: + description: CloudFront base URL pointing to staging Python index + required: true + type: string + rocm_version: + description: ROCm version to pip install (e.g. "7.10.0a20251124") + type: string + ref: + description: "Branch, tag or SHA to checkout. Defaults to the reference or SHA that triggered the workflow." + type: string + workflow_dispatch: + inputs: + amdgpu_family: + type: choice + options: + - gfx101X-dgpu + - gfx103X-dgpu + - gfx110X-all + - gfx1150 + - gfx1151 + - gfx120X-all + - gfx90X-dcgpu + - gfx94X-dcgpu + - gfx950-dcgpu + default: gfx94X-dcgpu + release_type: + description: The type of release to build ("dev", "nightly", or "prerelease"). All developer-triggered jobs should use "dev"! + type: string + default: "dev" + s3_subdir: + description: S3 subdirectory, not including the GPU-family + type: string + default: "v2" + s3_staging_subdir: + description: "Staging subdirectory to push the wheels for test" + type: string + default: "v2-staging" + cloudfront_url: + description: CloudFront URL pointing to Python index + type: string + default: "https://rocm.devreleases.amd.com/v2" + cloudfront_staging_url: + description: CloudFront base URL pointing to staging Python index + type: string + default: "https://rocm.devreleases.amd.com/v2-staging" + rocm_version: + description: ROCm version to pip install (e.g. "7.10.0a20251124") + type: string + ref: + description: "Branch, tag or SHA to checkout. Defaults to the reference or SHA that triggered the workflow." + type: string + default: '' + +permissions: + id-token: write + contents: read + +run-name: Release portable Linux PyTorch Wheels (${{ inputs.amdgpu_family }}, ${{ inputs.release_type }}, ${{ inputs.rocm_version }}) + +jobs: + release: + name: Release | ${{ inputs.amdgpu_family }} | py ${{ matrix.python_version }} | torch ${{ matrix.pytorch_git_ref }} + strategy: + fail-fast: false + matrix: + python_version: ["3.11", "3.12", "3.13"] + pytorch_git_ref: ["release/2.7", "release/2.8", "release/2.9", "nightly"] + include: + - pytorch_git_ref: release/2.7 + pytorch_patchset: rocm_2.7 + - pytorch_git_ref: release/2.8 + pytorch_patchset: rocm_2.8 + - pytorch_git_ref: release/2.9 + pytorch_patchset: rocm_2.9 + - pytorch_git_ref: nightly + pytorch_patchset: nightly + + uses: ./.github/workflows/build_portable_linux_pytorch_wheels.yml + with: + amdgpu_family: ${{ inputs.amdgpu_family }} + python_version: ${{ matrix.python_version }} + release_type: ${{ inputs.release_type }} + s3_subdir: ${{ inputs.s3_subdir }} + s3_staging_subdir: ${{ inputs.s3_staging_subdir }} + cloudfront_url: ${{ inputs.cloudfront_url }} + cloudfront_staging_url: ${{ inputs.cloudfront_staging_url }} + rocm_version: ${{ inputs.rocm_version }} + pytorch_git_ref: ${{ matrix.pytorch_git_ref }} + pytorch_patchset: ${{ matrix.pytorch_patchset }} + ref: ${{ inputs.ref || '' }} diff --git a/.github/workflows/release_windows_packages.yml b/.github/workflows/release_windows_packages.yml new file mode 100644 index 0000000000000..4c456b4d6489d --- /dev/null +++ b/.github/workflows/release_windows_packages.yml @@ -0,0 +1,360 @@ +name: Release Windows packages + +on: + # Trigger from another workflow (typically to build dev packages and then test them) + workflow_call: + inputs: + release_type: + description: The type of release to build ("dev", "nightly", or "prerelease"). All developer-triggered jobs should use "dev"! + type: string + default: "dev" + package_suffix: + type: string + s3_subdir: + description: "Subdirectory to push the Python packages" + type: string + default: "v2" + s3_staging_subdir: + description: "Staging subdirectory to push the packages" + type: string + default: "v2-staging" + families: + description: "Comma separated list of AMD GPU families, e.g. `gfx94X,gfx103x`, or empty for the default list" + type: string + prerelease_version: + description: "(Optional) Number of the prerelease" + type: string + repository: + description: "Repository to checkout. Otherwise, defaults to `github.repository`." + type: string + ref: + description: "Branch, tag or SHA to checkout. Defaults to the reference or SHA that triggered the workflow." + type: string + # Trigger manually (typically to test the workflow or manually build a release [candidate]) + workflow_dispatch: + inputs: + release_type: + description: The type of release to build ("dev", "nightly", or "prerelease"). All developer-triggered jobs should use "dev"! + type: string + default: "dev" + package_suffix: + type: string + s3_subdir: + description: "Subdirectory to push the Python packages" + type: string + default: "v2" + s3_staging_subdir: + description: "Staging subdirectory to push the packages" + type: string + default: "v2-staging" + families: + description: "A comma separated list of AMD GPU families, e.g. `gfx94X,gfx103x`, or empty for the default list" + type: string + prerelease_version: + description: "(Optional) Number of the prerelease" + type: string + extra_cmake_options: + description: "Extra options to pass to the CMake configure command" + type: string + + # Trigger on a schedule to build nightly release candidates. + schedule: + # Runs at 04:00 AM UTC, which is 8:00 PM PST (UTC-8) + - cron: '0 04 * * *' + +permissions: + contents: read + +run-name: Release Windows packages (${{ inputs.families || 'default' }}, ${{ inputs.release_type || 'nightly' }}) + +jobs: + setup_metadata: + if: ${{ github.repository_owner == 'ROCm' || github.event_name != 'schedule' }} + runs-on: ubuntu-24.04 + env: + release_type: ${{ inputs.release_type || 'nightly' }} + outputs: + version: ${{ steps.rocm_package_version.outputs.rocm_package_version }} + release_type: ${{ env.release_type }} + package_targets: ${{ steps.configure.outputs.package_targets }} + cloudfront_url: ${{ steps.release_information.outputs.cloudfront_url }} + cloudfront_staging_url: ${{ steps.release_information.outputs.cloudfront_staging_url }} + s3_subdir_tar: ${{ steps.release_information.outputs.s3_subdir_tar }} + steps: + - name: Checkout repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + + - name: Setup Python + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: 3.12 + + - name: Compute package version + id: rocm_package_version + run: | + python ./build_tools/compute_rocm_package_version.py \ + --release-type=${{ env.release_type }} \ + --prerelease-version=${{ inputs.prerelease_version }} + + - name: Set variables for nightly release + if: ${{ env.release_type == 'nightly' }} + run: | + echo "tmp_cloudfront_url=https://rocm.nightlies.amd.com/v2" >> $GITHUB_ENV + echo "tmp_cloudfront_staging_url=https://rocm.nightlies.amd.com/v2-staging" >> $GITHUB_ENV + echo "tmp_s3_subdir_tar=''" >> $GITHUB_ENV + + - name: Set variables for prerelease + if: ${{ env.release_type == 'prerelease' }} + run: | + echo "tmp_cloudfront_url=https://rocm.prereleases.amd.com/whl" >> $GITHUB_ENV + echo "tmp_cloudfront_staging_url=https://rocm.prereleases.amd.com/whl-staging" >> $GITHUB_ENV + echo "tmp_s3_subdir_tar=v3/tarball/" >> $GITHUB_ENV + + - name: Set variables for development release + if: ${{ env.release_type == 'dev' }} + run: | + echo "tmp_cloudfront_url=https://rocm.devreleases.amd.com/v2" >> $GITHUB_ENV + echo "tmp_cloudfront_staging_url=https://rocm.devreleases.amd.com/v2-staging" >> $GITHUB_ENV + echo "tmp_s3_subdir_tar=''" >> $GITHUB_ENV + + - name: Generate release information + id: release_information + run: | + echo "cloudfront_url=${tmp_cloudfront_url}" >> $GITHUB_OUTPUT + echo "cloudfront_staging_url=${tmp_cloudfront_staging_url}" >> $GITHUB_OUTPUT + echo "s3_subdir_tar=${tmp_s3_subdir_tar}" >> $GITHUB_OUTPUT + + - name: Generating package target matrix + id: configure + env: + AMDGPU_FAMILIES: ${{ inputs.families }} + THEROCK_PACKAGE_PLATFORM: "windows" + # Variable comes from ROCm organization variable 'ROCM_THEROCK_TEST_RUNNERS' + ROCM_THEROCK_TEST_RUNNERS: ${{ vars.ROCM_THEROCK_TEST_RUNNERS }} + LOAD_TEST_RUNNERS_FROM_VAR: false + run: python ./build_tools/github_actions/fetch_package_targets.py + + windows_packages: + name: ${{ matrix.target_bundle.amdgpu_family }}::Build Windows + runs-on: ${{ github.repository_owner == 'ROCm' && 'azure-windows-scale-rocm' || 'windows-2022' }} + continue-on-error: ${{ matrix.target_bundle.expect_failure == true }} # for GPU families that are flaky, we mark as xfail + timeout-minutes: 720 # 12 hour timeout + needs: [setup_metadata] + permissions: + contents: write + actions: write # Added permission to trigger workflows + id-token: write # Added permission for AWS S3 upload + defaults: + run: + shell: bash + strategy: + fail-fast: false + matrix: + target_bundle: ${{ fromJSON(needs.setup_metadata.outputs.package_targets) }} + env: + TEATIME_LABEL_GH_GROUP: 1 + BUILD_DIR: B:\build + CACHE_DIR: "${{github.workspace}}/.cache" + CCACHE_DIR: "${{github.workspace}}/.cache/ccache" + CCACHE_MAXSIZE: "4000M" + DIST_ARCHIVE: "B:/build/artifacts/therock-dist-windows-${{ matrix.target_bundle.amdgpu_family }}${{ inputs.package_suffix }}-${{ needs.setup_metadata.outputs.version }}.tar.gz" + RELEASE_TYPE: "${{ needs.setup_metadata.outputs.release_type }}" + S3_BUCKET_TAR: "therock-${{ needs.setup_metadata.outputs.release_type }}-tarball" + S3_SUBDIR_TAR: ${{ needs.setup_metadata.outputs.s3_subdir_tar }} + S3_BUCKET_PY: "therock-${{ needs.setup_metadata.outputs.release_type }}-python" + S3_SUBDIR: ${{ inputs.s3_subdir || 'v2' }} + S3_STAGING_SUBDIR: ${{ inputs.s3_staging_subdir || 'v2-staging' }} + + steps: + - name: "Checking out repository" + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: 3.12 + + - name: Install python deps + run: | + pip install -r requirements.txt + + # TODO(amd-justchen): share with build_windows_artifacts.yml. Include in VM image? Dockerfile? + - name: Install requirements + run: | + choco install --no-progress -y ccache + # ninja pinned due to a bug in the 1.13.0 release: + # https://github.com/ninja-build/ninja/issues/2616 + choco install --no-progress -y ninja --version 1.12.1 + choco install --no-progress -y strawberryperl + echo "$PATH;C:\Strawberry\c\bin" >> $GITHUB_PATH + choco install --no-progress -y awscli + choco install --no-progress -y pkgconfiglite + echo "$PATH;C:\Program Files\Amazon\AWSCLIV2" >> $GITHUB_PATH + + - uses: iterative/setup-dvc@4bdfd2b0f6f1ad7e08afadb03b1a895c352a5239 # v2.0.0 + with: + version: '3.62.0' + + # After other installs, so MSVC get priority in the PATH. + - name: Configure MSVC + uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0 + + - name: Runner health status + run: | + ccache --zero-stats + python ./build_tools/health_status.py + + # TODO: We shouldn't be using a cache on actual release branches, but it + # really helps for iteration time. + - name: Enable cache + uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + path: ${{ env.CACHE_DIR }} + key: windows-package-matrix-v1-${{ matrix.target_bundle.amdgpu_family }}-${{ github.sha }} + restore-keys: | + windows-package-matrix-v1-${{ matrix.target_bundle.amdgpu_family }}- + + - name: Fetch sources + timeout-minutes: 30 + run: | + git config fetch.parallel 10 + git config --global core.symlinks true + git config --global core.longpaths true + python ./build_tools/fetch_sources.py --jobs 12 + + - name: Configure Projects + env: + amdgpu_families: ${{ matrix.target_bundle.amdgpu_family }} + package_version: "ADHOCBUILD" + extra_cmake_options: ${{ inputs.extra_cmake_options }} + run: | + # clear cache before build and after download + ccache -z + + python3 build_tools/github_actions/build_configure.py + + - name: Build therock-dist + run: cmake --build "${{ env.BUILD_DIR }}" --target therock-dist + + - name: Build therock-archives + run: cmake --build "${{ env.BUILD_DIR }}" --target therock-archives + + - name: Compress dist folder + run: | + cd ${{ env.BUILD_DIR }}/dist/rocm + echo "Compressing ${{ env.DIST_ARCHIVE }}" + tar cfz "${{ env.DIST_ARCHIVE }}" --force-local . + + - name: Build Python Packages + run: | + python ./build_tools/build_python_packages.py \ + --artifact-dir=${{ env.BUILD_DIR }}/artifacts \ + --dest-dir=${{ env.BUILD_DIR }}/packages \ + --version=${{ needs.setup_metadata.outputs.version }} + + - name: Build report + if: ${{ !cancelled() }} + shell: bash + run: | + if [ -d "${{ env.BUILD_DIR }}" ]; then + echo "Build dir:" + echo "------------" + ls -lh "${{ env.BUILD_DIR }}" + echo "CCache Stats:" + echo "-------------" + ccache -s + else + echo "[ERROR] Build directory ${{ env.BUILD_DIR }} does not exist. Skipping report!" + echo " This should only happen if the CI is cancelled before the build step." + exit 1 # Stop the CI as build did not happen + fi + + - name: Configure AWS Credentials + if: ${{ github.repository_owner == 'ROCm' && !cancelled() }} + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-${{ env.RELEASE_TYPE }} + special-characters-workaround: true + + - name: Post Build Upload + if: ${{ github.repository_owner == 'ROCm' && !cancelled() }} + run: | + python3 build_tools/github_actions/post_build_upload.py \ + --run-id ${{ github.run_id }} \ + --artifact-group "${{ matrix.target_bundle.amdgpu_family }}" \ + --build-dir ${{ env.BUILD_DIR }} \ + --upload \ + --job-status ${{ job.status }} + + - name: Upload Releases to staging S3 + if: ${{ github.repository_owner == 'ROCm' }} + run: | + aws s3 cp ${{ env.BUILD_DIR }}/packages/dist/ s3://${{ env.S3_BUCKET_PY }}/${{ env.S3_STAGING_SUBDIR }}/${{ matrix.target_bundle.amdgpu_family }}/ \ + --recursive --no-follow-symlinks \ + --exclude "*" \ + --include "*.whl" \ + --include "*.tar.gz" + + - name: (Re-)Generate Python package release index for staging + if: ${{ github.repository_owner == 'ROCm' }} + env: + # Environment variable to be set for `manage.py` + CUSTOM_PREFIX: "${{ env.S3_STAGING_SUBDIR }}/${{ matrix.target_bundle.amdgpu_family }}" + run: | + pip install boto3 packaging + python ./build_tools/third_party/s3_management/manage.py ${{ env.CUSTOM_PREFIX }} + + ## TODO: Restrict uploading to the non-staging S3 directory until sanity checks and all validation tests have successfully passed. + - name: Upload Releases to S3 + if: ${{ github.repository_owner == 'ROCm' }} + run: | + aws s3 cp ${{ env.DIST_ARCHIVE }} s3://${{ env.S3_BUCKET_TAR }}/${{ env.S3_SUBDIR_TAR }} + aws s3 cp ${{ env.BUILD_DIR }}/packages/dist/ s3://${{ env.S3_BUCKET_PY }}/${{ env.S3_SUBDIR }}/${{ matrix.target_bundle.amdgpu_family }}/ \ + --recursive --no-follow-symlinks \ + --exclude "*" \ + --include "*.whl" \ + --include "*.tar.gz" + + # TODO(marbre): guard against race conditions where multiple workflows update the index at the same time? + # Moving the index computation server-side could help + - name: (Re-)Generate release index pages + if: ${{ github.repository_owner == 'ROCm' }} + env: + # Environment variable to be set for `manage.py` + CUSTOM_PREFIX: "${{ env.S3_SUBDIR }}/${{ matrix.target_bundle.amdgpu_family }}" + run: | + pip install boto3 packaging + python ./build_tools/third_party/s3_management/manage.py ${{ env.CUSTOM_PREFIX }} + python ./build_tools/index_generation_s3_tar.py \ + --bucket ${{ env.S3_BUCKET_TAR }} \ + --directory ${{ env.S3_SUBDIR_TAR }} \ + --upload + + - name: Trigger building PyTorch wheels + if: ${{ github.repository_owner == 'ROCm' && matrix.target_bundle.expect_pytorch_failure == false }} + uses: benc-uk/workflow-dispatch@e2e5e9a103e331dad343f381a29e654aea3cf8fc # v1.2.4 + with: + workflow: release_windows_pytorch_wheels.yml + inputs: | + { "amdgpu_family": "${{ matrix.target_bundle.amdgpu_family }}", + "release_type": "${{ env.RELEASE_TYPE }}", + "s3_subdir": "${{ env.S3_SUBDIR }}", + "s3_staging_subdir": "${{ env.S3_STAGING_SUBDIR }}", + "cloudfront_url": "${{ needs.setup_metadata.outputs.cloudfront_url }}", + "cloudfront_staging_url": "${{ needs.setup_metadata.outputs.cloudfront_staging_url }}", + "rocm_version": "${{ needs.setup_metadata.outputs.version }}", + "ref": "${{ inputs.ref || '' }}" + } + + - name: Save cache + uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + if: ${{ !cancelled() }} + with: + path: ${{ env.CACHE_DIR }} + key: windows-package-matrix-v1-${{ matrix.target_bundle.amdgpu_family }}-${{ github.sha }} diff --git a/.github/workflows/release_windows_pytorch_wheels.yml b/.github/workflows/release_windows_pytorch_wheels.yml new file mode 100644 index 0000000000000..85e0f6b88da81 --- /dev/null +++ b/.github/workflows/release_windows_pytorch_wheels.yml @@ -0,0 +1,110 @@ +name: Release Windows PyTorch Wheels + +on: + workflow_call: + inputs: + amdgpu_family: + required: true + type: string + release_type: + description: The type of release to build ("dev", "nightly", or "prerelease"). All developer-triggered jobs should use "dev"! + type: string + default: "dev" + s3_subdir: + description: S3 subdirectory, not including the GPU-family + type: string + default: "v2" + s3_staging_subdir: + description: Staging subdirectory to push the wheels for test + type: string + default: "v2-staging" + cloudfront_url: + description: CloudFront URL pointing to Python index + type: string + default: "https://rocm.devreleases.amd.com/v2" + cloudfront_staging_url: + description: CloudFront base URL pointing to staging Python index + required: true + type: string + rocm_version: + description: ROCm version to pip install (e.g. "7.10.0a20251124") + type: string + ref: + description: "Branch, tag or SHA to checkout. Defaults to the reference or SHA that triggered the workflow." + type: string + workflow_dispatch: + inputs: + amdgpu_family: + type: choice + options: + - gfx101X-dgpu + - gfx103X-dgpu + - gfx110X-all + - gfx1150 + - gfx1151 + - gfx120X-all + - gfx90X-dcgpu + - gfx94X-dcgpu + - gfx950-dcgpu + default: gfx1151 + release_type: + description: The type of release to build ("dev", "nightly", or "prerelease"). All developer-triggered jobs should use "dev"! + type: string + default: "dev" + s3_subdir: + description: S3 subdirectory, not including the GPU-family + type: string + default: "v2" + s3_staging_subdir: + description: "Staging subdirectory to push the wheels for test" + type: string + default: "v2-staging" + cloudfront_url: + description: CloudFront URL pointing to Python index + type: string + default: "https://rocm.devreleases.amd.com/v2" + cloudfront_staging_url: + description: CloudFront base URL pointing to staging Python index + type: string + default: "https://rocm.devreleases.amd.com/v2-staging" + rocm_version: + description: ROCm version to pip install (e.g. "7.10.0a20251124") + type: string + ref: + description: "Branch, tag or SHA to checkout. Defaults to the reference or SHA that triggered the workflow." + type: string + default: '' + +permissions: + id-token: write + contents: read + +run-name: Release Windows PyTorch Wheels (${{ inputs.amdgpu_family }}, ${{ inputs.release_type }}, ${{ inputs.rocm_version }}) + +jobs: + release: + name: Release | ${{ inputs.amdgpu_family }} | py ${{ matrix.python_version }} | torch ${{ matrix.pytorch_git_ref }} + strategy: + fail-fast: false + matrix: + python_version: ["3.11", "3.12", "3.13"] + pytorch_git_ref: ["release/2.9", "nightly"] + include: + - pytorch_git_ref: release/2.9 + pytorch_patchset: rocm_2.9 + - pytorch_git_ref: nightly + pytorch_patchset: nightly + + uses: ./.github/workflows/build_windows_pytorch_wheels.yml + with: + amdgpu_family: ${{ inputs.amdgpu_family }} + python_version: ${{ matrix.python_version }} + release_type: ${{ inputs.release_type }} + s3_subdir: ${{ inputs.s3_subdir }} + s3_staging_subdir: ${{ inputs.s3_staging_subdir }} + cloudfront_url: ${{ inputs.cloudfront_url }} + cloudfront_staging_url: ${{ inputs.cloudfront_staging_url }} + rocm_version: ${{ inputs.rocm_version }} + pytorch_git_ref: ${{ matrix.pytorch_git_ref }} + pytorch_patchset: ${{ matrix.pytorch_patchset }} + ref: ${{ inputs.ref || '' }} diff --git a/.github/workflows/setup.yml b/.github/workflows/setup.yml new file mode 100644 index 0000000000000..c0af83e89731d --- /dev/null +++ b/.github/workflows/setup.yml @@ -0,0 +1,93 @@ +name: Setup + +on: + workflow_call: + inputs: + build_variant: + type: string + default: "release" + multi_arch: + type: boolean + default: false + description: "If true, group all families into one entry per build_variant instead of expanding cross-product" + outputs: + enable_build_jobs: + description: Whether to enable build jobs. + value: ${{ jobs.setup.outputs.enable_build_jobs }} + linux_variants: + description: Matrix variants to run on Linux + value: ${{ jobs.setup.outputs.linux_variants }} + linux_test_labels: + description: ROCm projects to run Linux tests on. Optional filter. + value: ${{ jobs.setup.outputs.linux_test_labels }} + windows_variants: + description: Matrix variants to run on Windows. + value: ${{ jobs.setup.outputs.windows_variants }} + test_type: + description: The test type to run for component tests (i.e. smoke, full) + value: ${{ jobs.setup.outputs.test_type }} + windows_test_labels: + description: ROCm projects to run Windows tests on. Optional filter. + value: ${{ jobs.setup.outputs.windows_test_labels }} + rocm_package_version: + description: ROCm package version (primarily for Python packages). + value: ${{ jobs.setup.outputs.rocm_package_version }} + +permissions: + contents: read + +jobs: + setup: + runs-on: ubuntu-24.04 + env: + # The commit being checked out is the merge commit for a PR. Its first + # parent will be the tip of the base branch. + BASE_REF: HEAD^ + outputs: + enable_build_jobs: ${{ steps.configure.outputs.enable_build_jobs }} + linux_variants: ${{ steps.configure.outputs.linux_variants }} + linux_test_labels: ${{ steps.configure.outputs.linux_test_labels }} + windows_variants: ${{ steps.configure.outputs.windows_variants }} + test_type: ${{ steps.configure.outputs.test_type }} + windows_test_labels: ${{ steps.configure.outputs.windows_test_labels }} + rocm_package_version: ${{ steps.rocm_package_version.outputs.rocm_package_version }} + steps: + - name: Checkout TheRock repository + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + repository: "ROCm/TheRock" + ref: ${{ secrets.THEROCK_MAINLINE_REF }} + fetch-depth: 10 + - name: SHA of TheRock + run: | + git rev-parse HEAD + git log -1 + - name: Set PR_LABELS variable with labels assigned to pull request + if: ${{ github.event.pull_request }} # only set PR labels var if this is a pull request + env: + GITHUB_TOKEN: ${{ github.token }} + PR_NUMBER: ${{ github.event.number }} + run: | + echo "PR_LABELS=$(gh pr view ${PR_NUMBER} --repo ROCm/llvm-project --json labels)" >> $GITHUB_ENV + + - name: Configuring CI options + id: configure + env: + #INPUT_LINUX_AMDGPU_FAMILIES: ${{ github.event.inputs.linux_amdgpu_families }} + INPUT_LINUX_AMDGPU_FAMILIES: "gfx94X" + LINUX_TEST_LABELS: ${{ github.event.inputs.linux_test_labels }} + LINUX_USE_PREBUILT_ARTIFACTS: ${{ github.event.inputs.linux_use_prebuilt_artifacts }} + #INPUT_WINDOWS_AMDGPU_FAMILIES: ${{ github.event.inputs.windows_amdgpu_families }} + INPUT_WINDOWS_AMDGPU_FAMILIES: "gfx1151" + WINDOWS_TEST_LABELS: ${{ github.event.inputs.windows_test_labels }} + WINDOWS_USE_PREBUILT_ARTIFACTS: ${{ github.event.inputs.windows_use_prebuilt_artifacts }} + BUILD_VARIANT: ${{ inputs.build_variant }} + MULTI_ARCH: ${{ inputs.multi_arch }} + # Variable comes from ROCm organization variable 'ROCM_THEROCK_TEST_RUNNERS' + ROCM_THEROCK_TEST_RUNNERS: ${{ vars.ROCM_THEROCK_TEST_RUNNERS }} + LOAD_TEST_RUNNERS_FROM_VAR: false + run: ./build_tools/github_actions/configure_ci.py + + - name: Compute package version + id: rocm_package_version + run: python ./build_tools/compute_rocm_package_version.py --release-type=dev diff --git a/.github/workflows/test_artifacts.yml b/.github/workflows/test_artifacts.yml new file mode 100644 index 0000000000000..53a1e2442571d --- /dev/null +++ b/.github/workflows/test_artifacts.yml @@ -0,0 +1,122 @@ +name: Test Artifacts + +on: + workflow_dispatch: + inputs: + artifact_group: + type: string + artifact_run_id: + type: string + default: "" + amdgpu_families: + type: string + test_runs_on: + type: string + sanity_check_only_for_family: + type: boolean + default: false + test_type: + type: string + test_labels: + type: string + workflow_call: + inputs: + artifact_group: + type: string + artifact_run_id: + type: string + default: "" + amdgpu_families: + type: string + test_runs_on: + type: string + sanity_check_only_for_family: + type: boolean + default: false + test_type: + type: string + test_labels: + type: string + push: + branches: + - ADHOCBUILD + +permissions: + contents: read + +jobs: + configure_test_matrix: + name: "Configure test matrix" + # if there is a test machine available + if: ${{ inputs.test_runs_on != '' }} + runs-on: ${{ inputs.test_runs_on }} + outputs: + components: ${{ steps.configure.outputs.components }} + platform: ${{ steps.configure.outputs.platform }} + shard_arr: ${{ steps.configure.outputs.shard_arr }} + steps: + - name: "Fetch 'build_tools' from repository" + if: ${{ runner.os == 'Windows' }} + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: "ROCm/TheRock" + sparse-checkout: build_tools + ref: ${{ secrets.THEROCK_MAINLINE_REF }} + path: "prejob" + + # Checkout failure is possible on Windows, as it's the first job on a GPU test runner. + # Post-job cleanup isn't necessary since no executables are launched in this job. + - name: Pre-job cleanup processes on Windows + if: ${{ runner.os == 'Windows' }} + shell: powershell + run: . '${{ github.workspace }}\prejob\build_tools\github_actions\cleanup_processes.ps1' + + - name: "Checking out repository" + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: "ROCm/TheRock" + ref: ${{ secrets.THEROCK_MAINLINE_REF }} + + + - name: Setting up Python + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: 3.12 + + - name: "Configuring CI options" + id: configure + env: + ARTIFACT_GROUP: ${{ inputs.artifact_group }} + AMDGPU_FAMILIES: ${{ inputs.amdgpu_families }} + TEST_TYPE: ${{ inputs.test_type }} + TEST_LABELS: ${{ inputs.test_labels }} + run: python ./build_tools/github_actions/fetch_test_configurations.py + + test_sanity_check: + name: 'Test Sanity Check' + needs: configure_test_matrix + uses: './.github/workflows/test_sanity_check.yml' + with: + artifact_group: ${{ inputs.artifact_group }} + artifact_run_id: ${{ inputs.artifact_run_id }} + amdgpu_families: ${{ inputs.amdgpu_families }} + test_runs_on: ${{ inputs.test_runs_on }} + platform: ${{ needs.configure_test_matrix.outputs.platform }} + + test_components: + name: 'Test ${{ matrix.components.job_name }}' + needs: [test_sanity_check, configure_test_matrix] + # skip tests if no test matrix to run and sanity check only requested + if: ${{ needs.configure_test_matrix.outputs.components != '[]' && !inputs.sanity_check_only_for_family }} + strategy: + fail-fast: false + matrix: + components: ${{ fromJSON(needs.configure_test_matrix.outputs.components) }} + uses: './.github/workflows/test_component.yml' + with: + artifact_run_id: ${{ inputs.artifact_run_id }} + artifact_group: ${{ inputs.artifact_group }} + amdgpu_families: ${{ inputs.amdgpu_families }} + test_runs_on: ${{ inputs.test_runs_on }} + platform: ${{ needs.configure_test_matrix.outputs.platform }} + component: ${{ toJSON(matrix.components) }} diff --git a/.github/workflows/test_component.yml b/.github/workflows/test_component.yml new file mode 100644 index 0000000000000..7475e96436e9d --- /dev/null +++ b/.github/workflows/test_component.yml @@ -0,0 +1,110 @@ +name: Test component + +on: + workflow_call: + inputs: + artifact_run_id: + type: string + default: "" + artifact_group: + type: string + amdgpu_families: + type: string + test_runs_on: + type: string + platform: + type: string + component: + type: string + +permissions: + contents: read + +jobs: + test_component: + name: 'Test ${{ fromJSON(inputs.component).job_name }} (shard ${{ matrix.shard }} of ${{ fromJSON(inputs.component).total_shards }})' + runs-on: ${{ inputs.test_runs_on }} + timeout-minutes: 210 + container: + image: ${{ inputs.platform == 'linux' && 'ghcr.io/rocm/no_rocm_image_ubuntu24_04@sha256:4150afe4759d14822f0e3f8930e1124f26e11f68b5c7b91ec9a02b20b1ebbb98' || null }} + options: --ipc host + --group-add video + --device /dev/kfd + --device /dev/dri + --group-add 110 + --env-file /etc/podinfo/gha-gpu-isolation-settings + --user 0:0 # Running as root, by recommendation of GitHub: https://docs.github.com/en/actions/reference/workflows-and-actions/dockerfile-support#user + strategy: + fail-fast: false + matrix: + # The shard array is based on "total_shards" from "fetch_test_configurations.py" + # The test executable will shard based on the array. (ex: [1, 2, 3, 4] = four test shards) + shard: ${{ fromJSON(inputs.component).shard_arr }} + defaults: + run: + shell: bash + env: + VENV_DIR: ${{ github.workspace }}/.venv + ARTIFACT_RUN_ID: "${{ inputs.artifact_run_id != '' && inputs.artifact_run_id || github.run_id }}" + OUTPUT_ARTIFACTS_DIR: "./build" + THEROCK_BIN_DIR: "./build/bin" + AMDGPU_FAMILIES: ${{ inputs.amdgpu_families }} + steps: + - name: "Fetch 'build_tools' from repository" + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: "ROCm/TheRock" + ref: ${{ secrets.THEROCK_MAINLINE_REF }} + sparse-checkout: build_tools + path: "prejob" + + - name: Pre-job cleanup processes on Windows + if: ${{ runner.os == 'Windows' }} + shell: powershell + run: . '${{ github.workspace }}\prejob\build_tools\github_actions\cleanup_processes.ps1' + + - name: Checkout Repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: "ROCm/TheRock" + ref: ${{ secrets.THEROCK_MAINLINE_REF }} + + - name: Run setup test environment workflow + uses: './.github/actions/setup_test_environment' + with: + ARTIFACT_RUN_ID: ${{ env.ARTIFACT_RUN_ID }} + ARTIFACT_GROUP: ${{ inputs.artifact_group }} + OUTPUT_ARTIFACTS_DIR: ${{ env.OUTPUT_ARTIFACTS_DIR }} + VENV_DIR: ${{ env.VENV_DIR }} + FETCH_ARTIFACT_ARGS: ${{ fromJSON(inputs.component).fetch_artifact_args }} + IS_PR_FROM_FORK: ${{ github.event.pull_request.head.repo.fork }} + + # safe.directory must be set before Runner Health Status + - name: Adjust git config + run: | + git config --global --add safe.directory $PWD + git config fetch.parallel 10 + + - name: Runner health status + run: | + python ./build_tools/health_status.py + + - name: Driver / GPU sanity check + run: | + python ./build_tools/print_driver_gpu_info.py + + - name: Test + timeout-minutes: ${{ fromJSON(inputs.component).timeout_minutes }} + env: + SHARD_INDEX: ${{ matrix.shard }} + TOTAL_SHARDS: ${{ fromJSON(inputs.component).total_shards }} + TEST_TYPE: ${{ fromJSON(inputs.component).test_type }} + run: | + ${{ fromJSON(inputs.component).test_script }} + + # GitHub's 'Complete job' step is unaware of launched executables + # and will fail to clean up orphan processes. + - name: Post-job cleanup processes on Windows + if: ${{ always() && runner.os == 'Windows' }} + shell: powershell + run: . '${{ github.workspace }}\build_tools\github_actions\cleanup_processes.ps1' diff --git a/.github/workflows/test_jax_dockerfile.yml b/.github/workflows/test_jax_dockerfile.yml new file mode 100644 index 0000000000000..a577dbe5e4ef0 --- /dev/null +++ b/.github/workflows/test_jax_dockerfile.yml @@ -0,0 +1,54 @@ +name: Test JAX Wheels + +on: + workflow_dispatch: + inputs: + test_runs_on: + required: true + type: string + default: "linux-mi325-1gpu-ossci-rocm-frac" + image_name: + required: true + description: JAX docker image to run tests with + type: string + jax_version: + description: Version of JAX to install + required: false + type: string + jax_plugin_branch: + required: true + description: JAX plugin branch to checkout + type: string + default: "rocm-jaxlib-v0.6.0" + + workflow_call: + inputs: + test_runs_on: + required: true + type: string + image_name: + required: true + description: JAX docker image to run tests with + type: string + jax_version: + description: Version of JAX to install instead of the one on the docker image + required: false + type: string + jax_plugin_branch: + description: JAX plugin branch to checkout to use for test scripts + type: string + default: "rocm-jaxlib-v0.8.0" + +permissions: + contents: read + +jobs: + test_wheels: + name: Test + runs-on: ${{ inputs.test_runs_on }} + steps: + - name: Checkout + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repo: rocm/rocm-jax + # TODO: Add steps for creating the JAX docker image with an install of TheRock and then running JAX tests on the container diff --git a/.github/workflows/test_linux_jax_wheels.yml b/.github/workflows/test_linux_jax_wheels.yml new file mode 100644 index 0000000000000..00823960f1b0d --- /dev/null +++ b/.github/workflows/test_linux_jax_wheels.yml @@ -0,0 +1,203 @@ +name: Test Linux JAX Wheels + +on: + workflow_call: + inputs: + amdgpu_family: + required: true + type: string + release_type: + required: true + type: string + s3_subdir: + required: true + type: string + package_index_url: + description: Base CloudFront URL for the Python package index + required: true + type: string + rocm_version: + description: ROCm version (optional, informational) + required: false + type: string + tar_url: + description: URL to TheRock tarball to configure ROCm + required: true + type: string + python_version: + description: Python version(s) to test (e.g., "3.12") + required: true + type: string + repository: + description: "Repository to checkout. Otherwise, defaults to `github.repository`." + type: string + jax_ref: + description: rocm-jax repository ref/branch to check out + required: false + type: string + ref: + description: "Branch, tag or SHA to checkout. Defaults to the reference or SHA that triggered the workflow." + type: string + test_runs_on: + required: true + type: string + + workflow_dispatch: + inputs: + amdgpu_family: + type: choice + options: + - gfx101X-dgpu + - gfx103X-dgpu + - gfx110X-all + - gfx1150 + - gfx1151 + - gfx120X-all + - gfx90X-dcgpu + - gfx94X-dcgpu + - gfx950-dcgpu + default: gfx94X-dcgpu + release_type: + description: The type of release ("nightly" or "dev") + required: true + type: string + default: dev + s3_subdir: + description: S3 subdirectory, not including the GPU-family + required: true + type: string + default: v2 + package_index_url: + description: Base CloudFront URL for the Python package index + required: true + type: string + default: https://rocm.nightlies.amd.com/v2-staging/ + rocm_version: + description: ROCm version + required: false + type: string + tar_url: + description: URL to TheRock tarball to configure ROCm + required: true + type: string + python_version: + description: Python version(s) to test (e.g., "3.12") + required: true + type: string + default: "3.12" + jax_ref: + description: rocm-jax repository ref/branch to check out + required: false + type: string + test_runs_on: + description: Runner label to use. The selected runner should have a GPU supported by amdgpu_family + required: true + type: string + default: "linux-mi325-1gpu-ossci-rocm-frac" + ref: + description: "Branch, tag or SHA to checkout. Defaults to the reference or SHA that triggered the workflow." + type: string + +permissions: + contents: read + packages: read + +jobs: + test_jax_wheels: + name: Test JAX Wheels | ${{ inputs.amdgpu_family }} + runs-on: ${{ inputs.test_runs_on }} + container: + image: ghcr.io/rocm/no_rocm_image_ubuntu24_04@sha256:405945a40deaff9db90b9839c0f41d4cba4a383c1a7459b28627047bf6302a26 + options: >- + --device /dev/kfd + --device /dev/dri + --group-add render + --group-add video + --user root + --env-file /etc/podinfo/gha-gpu-isolation-settings + defaults: + run: + shell: bash + env: + VIRTUAL_ENV: ${{ github.workspace }}/.venv + AMDGPU_FAMILY: ${{ inputs.amdgpu_family }} + THEROCK_TAR_URL: ${{ inputs.tar_url }} + PYTHON_VERSION: ${{ inputs.python_version }} + WHEEL_INDEX_URL: ${{ inputs.package_index_url }}/${{ inputs.amdgpu_family }} + + steps: + - name: Checkout + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + + - name: Checkout rocm-jax (plugin + build scripts) + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + path: jax + repository: rocm/rocm-jax + ref: ${{ inputs.jax_ref }} + + - name: Checkout JAX extended tests repo + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: rocm/jax + ref: ${{ inputs.jax_ref }} + path: jax/jax_tests + + - name: Set up Python + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ inputs.python_version }} + check-latest: true + + - name: System deps, venv configure + run: | + python3 -m venv "${VIRTUAL_ENV}" + echo "PATH=${VIRTUAL_ENV}/bin:${PATH}" >> "$GITHUB_ENV" + python3 build_tools/setup_venv.py "${VIRTUAL_ENV}" --activate-in-future-github-actions-steps + + - name: Install base JAX test requirements + run: | + # This script sets up the venv and activates it across steps; keep it consistent + pip install -r external-builds/jax/requirements-jax.txt + + - name: Configure ROCm from TheRock tarball + env: + ROCM_VERSION: ${{ inputs.rocm_version }} + AMDGPU_FAMILY: ${{ inputs.amdgpu_family }} + run: | + DEST="/opt/rocm-${{ inputs.rocm_version }}" + # Install directly from TheRock release buckets (nightly/dev) using the provided version + python build_tools/install_rocm_from_artifacts.py \ + --release "${{ inputs.rocm_version }}" \ + --artifact-group "${{ inputs.amdgpu_family }}" \ + --output-dir "${DEST}" + + - name: Extract JAX version and set to GITHUB_ENV + run: | + # Extract JAX version from requirements.txt (e.g., "jax==0.8.0") + # Remove all whitespace from requirements.txt to simplify parsing + # Search for lines starting with "jax==" or "jaxlib==" followed by version (excluding comments) + # Extract the version number by splitting on '=' and taking the 3rd field + # [^#]+ matches one or more characters that are NOT '#', ensuring we stop before any inline comments + JAX_VERSION=$(tr -d ' ' < jax/build/requirements.txt \ + | grep -E '^(jax|jaxlib)==[^#]+' | head -n1 | cut -d'=' -f3) + echo "JAX_VERSION=$JAX_VERSION" >> "$GITHUB_ENV" + + - name: Install JAX wheels from package index + run: | + # Install jaxlib/plugin/pjrt from the GPU-family index; install jax from PyPI to match the version + pip install --index-url "${{ env.WHEEL_INDEX_URL }}" \ + "jaxlib==${JAX_VERSION}+rocm${{ inputs.rocm_version }}" \ + "jax-rocm7-plugin==${JAX_VERSION}+rocm${{ inputs.rocm_version }}" \ + "jax-rocm7-pjrt==${JAX_VERSION}+rocm${{ inputs.rocm_version }}" + pip install --extra-index-url https://pypi.org/simple "jax==${JAX_VERSION}" + + - name: Run JAX tests + run: | + pytest jax/jax_tests/tests/multi_device_test.py -q --log-cli-level=INFO + pytest jax/jax_tests/tests/core_test.py -q --log-cli-level=INFO + pytest jax/jax_tests/tests/util_test.py -q --log-cli-level=INFO + pytest jax/jax_tests/tests/scipy_stats_test.py -q --log-cli-level=INFO diff --git a/.github/workflows/test_pytorch_wheels.yml b/.github/workflows/test_pytorch_wheels.yml new file mode 100644 index 0000000000000..93fe73a704412 --- /dev/null +++ b/.github/workflows/test_pytorch_wheels.yml @@ -0,0 +1,190 @@ +name: Test PyTorch Wheels + +on: + workflow_dispatch: + inputs: + amdgpu_family: + description: GPU family to test + required: true + type: string + default: "gfx94X-dcgpu" + test_runs_on: + description: Runner label to use. The selected runner should have a GPU supported by amdgpu_family + required: true + type: string + default: "linux-mi325-1gpu-ossci-rocm-frac" + package_index_url: + description: Base Python package index URL to test, typically nightly/dev URL with a "v2" or "v2-staging" subdir (without a GPU family subdir) + required: true + type: string + default: "https://rocm.nightlies.amd.com/v2" + python_version: + required: true + type: string + default: "3.12" + torch_version: + description: torch package version to install. (e.g. "2.7.1+rocm7.10.0a20251120") + required: true + type: string + pytorch_git_ref: + description: PyTorch ref to checkout test sources from. (e.g. "nightly", or "release/2.7") + type: string + default: "release/2.7" + + workflow_call: + inputs: + amdgpu_family: + required: true + type: string + test_runs_on: + required: true + type: string + package_index_url: + required: true + type: string + python_version: + required: true + type: string + torch_version: + required: true + type: string + pytorch_git_ref: + type: string + default: "release/2.7" + repository: + description: "Repository to checkout. Otherwise, defaults to `github.repository`." + type: string + ref: + description: "Branch, tag or SHA to checkout. Defaults to the reference or SHA that triggered the workflow." + type: string + +permissions: + contents: read + +run-name: Test PyTorch (${{ inputs.amdgpu_family }}, ${{ inputs.torch_version}}, ${{ inputs.test_runs_on }}) + +jobs: + test_wheels: + name: Test PyTorch | ${{ inputs.amdgpu_family }} + runs-on: ${{ inputs.test_runs_on }} + container: + image: ${{ contains(inputs.test_runs_on, 'linux') && 'ghcr.io/rocm/no_rocm_image_ubuntu24_04@sha256:405945a40deaff9db90b9839c0f41d4cba4a383c1a7459b28627047bf6302a26' || null }} + options: --ipc host + --group-add video + --device /dev/kfd + --device /dev/dri + --group-add 110 + --env-file /etc/podinfo/gha-gpu-isolation-settings + --user 0:0 # Running as root, by recommendation of GitHub: https://docs.github.com/en/actions/reference/workflows-and-actions/dockerfile-support#user + defaults: + run: + shell: bash + env: + VENV_DIR: ${{ github.workspace }}/.venv + AMDGPU_FAMILY: ${{ inputs.amdgpu_family }} + + steps: + - name: Checkout + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + + - name: Set up Python + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ inputs.python_version }} + + # TODO: also upload and reference test report together with this logging? + - name: Summarize workflow inputs + run: | + python build_tools/github_actions/summarize_test_pytorch_workflow.py \ + --torch-version=${{ inputs.torch_version }} \ + --pytorch-git-ref=${{ inputs.pytorch_git_ref }} \ + --index-url=${{ inputs.package_index_url }} \ + --index-subdir=${{ inputs.amdgpu_family }} + + - name: Set git options + run: | + git config --global core.longpaths true + + # Here we checkout the same version of PyTorch that wheels were built from + # so we have the right set of test source files. We _probably_ don't need + # to run HIPIFY or apply any patches, so we skip those steps to save time. + - name: Checkout PyTorch Source Repos from nightly branch + if: ${{ (inputs.pytorch_git_ref == 'nightly') }} + run: | + python external-builds/pytorch/pytorch_torch_repo.py checkout \ + --gitrepo-origin https://github.com/pytorch/pytorch.git \ + --repo-hashtag nightly \ + --no-hipify --no-patch + + - name: Checkout PyTorch Source Repos from stable branch + if: ${{ (inputs.pytorch_git_ref != 'nightly') }} + run: | + python external-builds/pytorch/pytorch_torch_repo.py checkout \ + --gitrepo-origin https://github.com/ROCm/pytorch.git \ + --repo-hashtag ${{ inputs.pytorch_git_ref }} \ + --no-hipify --no-patch + + - name: Set up virtual environment + run: | + python build_tools/setup_venv.py ${VENV_DIR} \ + --packages torch==${{ inputs.torch_version }} \ + --index-url=${{ inputs.package_index_url }} \ + --index-subdir=${{ inputs.amdgpu_family }} \ + --activate-in-future-github-actions-steps + + - name: Install test requirements + run: | + python -m pip install -r external-builds/pytorch/requirements-test.txt + pip freeze + + - name: Run rocm-sdk sanity tests + run: | + rocm-sdk test + + - name: Run PyTorch smoketests + run: | + python ./external-builds/pytorch/run_pytorch_smoke_tests.py -- \ + --log-cli-level=INFO \ + -v + + - name: (Linux) Run PyTorch tests + if: ${{ runner.os == 'Linux' }} + run: | + python ./external-builds/pytorch/run_pytorch_tests.py -- \ + --continue-on-collection-errors \ + --import-mode=importlib \ + -v + + # Windows testing is a recent addition and is being enabled incrementally. + # See https://github.com/ROCm/TheRock/issues/2258. + # + # Many tests are failing on torch 2.10+ so we limit testing to 2.9. + # (Obviously that's not ideal, but we need to start somewhere) + # + # HACK: The test process does not terminate on its own gracefully, + # so we write to run_pytorch_tests_exit_code.txt and then kill the process. + # After killing the process we read the return code to signal it normally. + # See https://github.com/ROCm/TheRock/issues/999. + - name: (Windows) Run PyTorch tests + if: ${{ runner.os == 'Windows' && contains(inputs.torch_version, '2.9') }} + continue-on-error: true + run: | + python ./external-builds/pytorch/run_pytorch_tests.py -- \ + --continue-on-collection-errors \ + --import-mode=importlib \ + -v + + - name: (Windows) Read and propagate exit code + if: ${{ runner.os == 'Windows' && contains(inputs.torch_version, '2.9') }} + run: | + if [ -f run_pytorch_tests_exit_code.txt ]; then + EXIT_CODE=$(cat run_pytorch_tests_exit_code.txt) + echo "Exit code from file: ${EXIT_CODE}" + exit ${EXIT_CODE} + else + echo "No run_pytorch_tests_exit_code.txt found" + exit 1 + fi diff --git a/.github/workflows/test_sanity_check.yml b/.github/workflows/test_sanity_check.yml new file mode 100644 index 0000000000000..830e6beae8b40 --- /dev/null +++ b/.github/workflows/test_sanity_check.yml @@ -0,0 +1,118 @@ +name: TheRock Sanity Check + +on: + workflow_dispatch: + inputs: + artifact_group: + type: string + artifact_run_id: + type: string + default: "" + amdgpu_families: + type: string + default: "" + test_runs_on: + type: string + platform: + type: string + workflow_call: + inputs: + artifact_group: + type: string + artifact_run_id: + type: string + default: "" + amdgpu_families: + type: string + default: "" + test_runs_on: + type: string + platform: + type: string + push: + branches: + - ADHOCBUILD + +permissions: + contents: read + +jobs: + test_sanity_check: + name: "Sanity ROCM Test" + runs-on: ${{ inputs.test_runs_on }} + container: + image: ${{ inputs.platform == 'linux' && 'ghcr.io/rocm/no_rocm_image_ubuntu24_04@sha256:405945a40deaff9db90b9839c0f41d4cba4a383c1a7459b28627047bf6302a26' || null }} + options: --ipc host + --group-add video + --device /dev/kfd + --device /dev/dri + --group-add 110 + --env-file /etc/podinfo/gha-gpu-isolation-settings + --user 0:0 # Running as root, by recommendation of GitHub: https://docs.github.com/en/actions/reference/workflows-and-actions/dockerfile-support#user + defaults: + run: + shell: bash + env: + VENV_DIR: ${{ github.workspace }}/.venv + ARTIFACT_RUN_ID: "${{ inputs.artifact_run_id != '' && inputs.artifact_run_id || github.run_id }}" + OUTPUT_ARTIFACTS_DIR: ${{ github.workspace }}/build + THEROCK_BIN_DIR: ${{ github.workspace }}/build/bin + steps: + - name: "Fetch 'build_tools' from repository" + if: ${{ runner.os == 'Windows' }} + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + sparse-checkout: build_tools + path: prejob + + - name: Pre-job cleanup processes on Windows + if: ${{ runner.os == 'Windows' }} + shell: powershell + run: . '${{ github.workspace }}\prejob\build_tools\github_actions\cleanup_processes.ps1' + + - name: Checkout Repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: "ROCm/TheRock" + ref: ${{ secrets.THEROCK_MAINLINE_REF }} + + - name: Pre-job cleanup Docker containers on Linux + if: ${{ runner.os == 'Linux' }} + shell: bash + run: | + # Remove any stopped containers + docker container prune -f || true + # Remove dangling networks + docker network prune -f || true + + - name: Run setup test environment workflow + uses: './.github/actions/setup_test_environment' + with: + ARTIFACT_GROUP: ${{ inputs.artifact_group }} + ARTIFACT_RUN_ID: ${{ env.ARTIFACT_RUN_ID }} + OUTPUT_ARTIFACTS_DIR: ${{ env.OUTPUT_ARTIFACTS_DIR }} + VENV_DIR: ${{ env.VENV_DIR }} + FETCH_ARTIFACT_ARGS: "--base-only" + IS_PR_FROM_FORK: ${{ github.event.pull_request.head.repo.fork }} + + - name: Set HIP_CLANG_PATH for windows + if: ${{ runner.os == 'Windows' }} + run: echo "HIP_CLANG_PATH=${OUTPUT_ARTIFACTS_DIR}\lib\llvm\bin" >> $GITHUB_ENV + + - name: Driver / GPU sanity check + run: | + python ./build_tools/print_driver_gpu_info.py + + - name: Run ROCm Sanity Tests + timeout-minutes: 5 + env: + # Enable verbose logging, see + # https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/debugging.html + AMD_LOG_LEVEL: 4 + run: | + pytest tests/ --log-cli-level=info --timeout=60 + + - name: Post-job cleanup processes on Windows + if: ${{ always() && runner.os == 'Windows' }} + shell: powershell + run: . '${{ github.workspace }}\build_tools\github_actions\cleanup_processes.ps1' diff --git a/.github/workflows/therock_test_harness.yml b/.github/workflows/therock_test_harness.yml new file mode 100644 index 0000000000000..1699af369a140 --- /dev/null +++ b/.github/workflows/therock_test_harness.yml @@ -0,0 +1,101 @@ +name: TheRock Test Harness + +on: + workflow_dispatch: + inputs: + families: + type: string + description: 'The AMD GPU family to test. ex: gfx94X, gfx120X' + default: 'gfx94X' + release_version: + type: string + description: 'TheRock release version. (ex: nightly-tarball (X.Y.ZrcYYYYMMDD) or dev-tarball (X.Y.Z.dev0+{hash}))' + default: '7.9.0rc20251008' + tests_to_run: + type: string + description: 'The list of tests to run with "or" expression. (ex: "hipcub or rocprim")' + default: 'hipcub or rocprim or rocrand or rocthrust' + +permissions: + contents: read + +concurrency: + # A PR number if a pull request and otherwise the commit hash. This cancels + # queued and in-progress runs for the same PR (presubmit) or commit + # (postsubmit). The workflow name is prepended to avoid conflicts between + # different workflows. + group: ${{ github.workflow }}-${{ github.event.number || github.sha }} + cancel-in-progress: true + +jobs: + setup_metadata: + runs-on: ubuntu-24.04 + outputs: + package_targets: ${{ steps.configure.outputs.package_targets }} + steps: + - name: Checkout Repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: "ROCm/TheRock" + + - name: Setup Python + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: 3.12 + + - name: Generating package target matrix + id: configure + env: + AMDGPU_FAMILIES: ${{ inputs.families }} + THEROCK_PACKAGE_PLATFORM: "linux" + TEST_HARNESS_TARGET_FETCH: true + # Variable comes from ROCm organization variable 'ROCM_THEROCK_TEST_RUNNERS' + ROCM_THEROCK_TEST_RUNNERS: ${{ vars.ROCM_THEROCK_TEST_RUNNERS }} + LOAD_TEST_RUNNERS_FROM_VAR: false + run: python ./build_tools/github_actions/fetch_package_targets.py + + + therock_test_harness_linux: + name: TheRock Tests Sharded Linux Nightly + needs: [setup_metadata] + runs-on: ${{ matrix.target_bundle.test_machine }} + container: + image: 'ghcr.io/rocm/no_rocm_image_ubuntu24_04@sha256:4150afe4759d14822f0e3f8930e1124f26e11f68b5c7b91ec9a02b20b1ebbb98' + options: --ipc host + --group-add video + --device /dev/kfd + --device /dev/dri + --group-add 110 + --env-file /etc/podinfo/gha-gpu-isolation-settings + strategy: + fail-fast: false + matrix: + target_bundle: ${{ fromJSON(needs.setup_metadata.outputs.package_targets) }} + defaults: + run: + shell: bash + steps: + - name: Checkout Repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: "ROCm/TheRock" + + - name: Setup Python + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: 3.12 + + - name: Install TheRock + env: + release_version: ${{ inputs.release_version }} + run: | + pip install -r requirements-test.txt + python3 build_tools/install_rocm_from_artifacts.py --tests --amdgpu-family ${{ matrix.target_bundle.amdgpu_family }} --release ${{ env.release_version }} + + # TODO: add parallelism + - name: Running test harness + # TESTING + run: | + python3 -m pytest -s -v --tb=short --therock-path=./therock-build tests/harness/tests*.py -k ${{ inputs.tests_to_run }} + +# TODO: Add windows tests diff --git a/clang/lib/CodeGen/ABIInfoImpl.cpp b/clang/lib/CodeGen/ABIInfoImpl.cpp index 1e3ac2e31870f..8250247a0204a 100644 --- a/clang/lib/CodeGen/ABIInfoImpl.cpp +++ b/clang/lib/CodeGen/ABIInfoImpl.cpp @@ -301,39 +301,6 @@ bool CodeGen::isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays, return true; } -bool CodeGen::isEmptyFieldForLayout(const ASTContext &Context, - const FieldDecl *FD) { - if (FD->isZeroLengthBitField()) - return true; - - if (FD->isUnnamedBitField()) - return false; - - return isEmptyRecordForLayout(Context, FD->getType()); -} - -bool CodeGen::isEmptyRecordForLayout(const ASTContext &Context, QualType T) { - const auto *RD = T->getAsRecordDecl(); - if (!RD) - return false; - - // If this is a C++ record, check the bases first. - if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { - if (CXXRD->isDynamicClass()) - return false; - - for (const auto &I : CXXRD->bases()) - if (!isEmptyRecordForLayout(Context, I.getType())) - return false; - } - - for (const auto *I : RD->fields()) - if (!isEmptyFieldForLayout(Context, I)) - return false; - - return true; -} - const Type *CodeGen::isSingleElementStruct(QualType T, ASTContext &Context) { const auto *RD = T->getAsRecordDecl(); if (!RD) diff --git a/clang/lib/CodeGen/ABIInfoImpl.h b/clang/lib/CodeGen/ABIInfoImpl.h index d9d79c6a55ddb..f0276be8cb97f 100644 --- a/clang/lib/CodeGen/ABIInfoImpl.h +++ b/clang/lib/CodeGen/ABIInfoImpl.h @@ -120,16 +120,6 @@ bool isEmptyField(ASTContext &Context, const FieldDecl *FD, bool AllowArrays, bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays, bool AsIfNoUniqueAddr = false); -/// isEmptyFieldForLayout - Return true iff the field is "empty", that is, -/// either a zero-width bit-field or an \ref isEmptyRecordForLayout. -bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD); - -/// isEmptyRecordForLayout - Return true iff a structure contains only empty -/// base classes (per \ref isEmptyRecordForLayout) and fields (per -/// \ref isEmptyFieldForLayout). Note, C++ record fields are considered empty -/// if the [[no_unique_address]] attribute would have made them empty. -bool isEmptyRecordForLayout(const ASTContext &Context, QualType T); - /// isSingleElementStruct - Determine if a structure is a "single /// element struct", i.e. it has exactly one non-empty field or /// exactly one field which is itself a single element diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 62f5d2f789326..b292efea94861 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -10,7 +10,6 @@ // //===----------------------------------------------------------------------===// -#include "ABIInfoImpl.h" #include "CGBlocks.h" #include "CGCXXABI.h" #include "CGDebugInfo.h" @@ -927,7 +926,7 @@ namespace { } void addMemcpyableField(FieldDecl *F) { - if (isEmptyFieldForLayout(CGF.getContext(), F)) + if (F->isZeroSize(CGF.getContext())) return; if (!FirstField) addInitialField(F); @@ -1884,7 +1883,7 @@ namespace { const CXXDestructorDecl *DD) : Context(Context), EHStack(EHStack), DD(DD), StartIndex(std::nullopt) {} void PushCleanupForField(const FieldDecl *Field) { - if (isEmptyFieldForLayout(Context, Field)) + if (Field->isZeroSize(Context)) return; unsigned FieldIndex = Field->getFieldIndex(); if (FieldHasTrivialDestructorBody(Context, Field)) { diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index d80c2d20f3f19..7cd663f97a9ed 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5245,7 +5245,7 @@ static Address emitAddrOfZeroSizeField(CodeGenFunction &CGF, Address Base, /// The resulting address doesn't necessarily have the right type. static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base, const FieldDecl *field, bool IsInBounds) { - if (isEmptyFieldForLayout(CGF.getContext(), field)) + if (field->isZeroSize(CGF.getContext())) return emitAddrOfZeroSizeField(CGF, base, field, IsInBounds); const RecordDecl *rec = field->getParent(); diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 6407afc3d9447..9dc74d5b78ea9 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -10,7 +10,6 @@ // //===----------------------------------------------------------------------===// -#include "ABIInfoImpl.h" #include "CGCXXABI.h" #include "CGObjCRuntime.h" #include "CGRecordLayout.h" @@ -758,7 +757,7 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) { // Zero-sized fields are not emitted, but their initializers may still // prevent emission of this struct as a constant. - if (isEmptyFieldForLayout(CGM.getContext(), Field)) { + if (Field->isZeroSize(CGM.getContext())) { if (Init && Init->HasSideEffects(CGM.getContext())) return false; continue; @@ -893,8 +892,7 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, continue; // Don't emit anonymous bitfields or zero-sized fields. - if (Field->isUnnamedBitField() || - isEmptyFieldForLayout(CGM.getContext(), *Field)) + if (Field->isUnnamedBitField() || Field->isZeroSize(CGM.getContext())) continue; // Emit the value of the initializer. @@ -2642,10 +2640,8 @@ static llvm::Constant *EmitNullConstant(CodeGenModule &CGM, const auto *base = I.getType()->castAsCXXRecordDecl(); // Ignore empty bases. - if (isEmptyRecordForLayout(CGM.getContext(), I.getType()) || - CGM.getContext() - .getASTRecordLayout(base) - .getNonVirtualSize() + if (base->isEmpty() || + CGM.getContext().getASTRecordLayout(base).getNonVirtualSize() .isZero()) continue; @@ -2659,8 +2655,7 @@ static llvm::Constant *EmitNullConstant(CodeGenModule &CGM, for (const auto *Field : record->fields()) { // Fill in non-bitfields. (Bitfields always use a zero pattern, which we // will fill in later.) - if (!Field->isBitField() && - !isEmptyFieldForLayout(CGM.getContext(), Field)) { + if (!Field->isBitField() && !Field->isZeroSize(CGM.getContext())) { unsigned fieldIndex = layout.getLLVMFieldNo(Field); elements[fieldIndex] = CGM.EmitNullConstant(Field->getType()); } @@ -2680,7 +2675,7 @@ static llvm::Constant *EmitNullConstant(CodeGenModule &CGM, for (const auto &I : CXXR->vbases()) { const auto *base = I.getType()->castAsCXXRecordDecl(); // Ignore empty bases. - if (isEmptyRecordForLayout(CGM.getContext(), I.getType())) + if (base->isEmpty()) continue; unsigned fieldIndex = layout.getVirtualBaseIndex(base); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 5ceaaf30b8d24..75d7718562654 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "CGOpenMPRuntime.h" -#include "ABIInfoImpl.h" #include "CGCXXABI.h" #include "CGCleanup.h" #include "CGDebugInfo.h" @@ -8472,15 +8471,12 @@ class MappableExprsHandler { for (const auto &I : RD->bases()) { if (I.isVirtual()) continue; - - QualType BaseTy = I.getType(); - const auto *Base = BaseTy->getAsCXXRecordDecl(); + const auto *Base = I.getType()->getAsCXXRecordDecl(); // Ignore empty bases. - if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) || - CGF.getContext() - .getASTRecordLayout(Base) - .getNonVirtualSize() - .isZero()) + if (Base->isEmpty() || CGF.getContext() + .getASTRecordLayout(Base) + .getNonVirtualSize() + .isZero()) continue; unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); @@ -8488,12 +8484,10 @@ class MappableExprsHandler { } // Fill in virtual bases. for (const auto &I : RD->vbases()) { - QualType BaseTy = I.getType(); + const auto *Base = I.getType()->getAsCXXRecordDecl(); // Ignore empty bases. - if (isEmptyRecordForLayout(CGF.getContext(), BaseTy)) + if (Base->isEmpty()) continue; - - const auto *Base = BaseTy->getAsCXXRecordDecl(); unsigned FieldIndex = RL.getVirtualBaseIndex(Base); if (RecordLayout[FieldIndex]) continue; @@ -8504,8 +8498,7 @@ class MappableExprsHandler { for (const auto *Field : RD->fields()) { // Fill in non-bitfields. (Bitfields always use a zero pattern, which we // will fill in later.) - if (!Field->isBitField() && - !isEmptyFieldForLayout(CGF.getContext(), Field)) { + if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { unsigned FieldIndex = RL.getLLVMFieldNo(Field); RecordLayout[FieldIndex] = Field; } diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp index e9205c68c2812..5580cee1f49f6 100644 --- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -10,9 +10,8 @@ // //===----------------------------------------------------------------------===// -#include "ABIInfoImpl.h" -#include "CGCXXABI.h" #include "CGRecordLayout.h" +#include "CGCXXABI.h" #include "CodeGenTypes.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" @@ -385,7 +384,7 @@ void CGRecordLowering::accumulateFields(bool isNonVirtualBaseType) { Field = accumulateBitFields(isNonVirtualBaseType, Field, FieldEnd); assert((Field == FieldEnd || !Field->isBitField()) && "Failed to accumulate all the bitfields"); - } else if (isEmptyFieldForLayout(Context, *Field)) { + } else if (Field->isZeroSize(Context)) { // Empty fields have no storage. ++Field; } else { @@ -634,7 +633,7 @@ CGRecordLowering::accumulateBitFields(bool isNonVirtualBaseType, // non-reusable tail padding. CharUnits LimitOffset; for (auto Probe = Field; Probe != FieldEnd; ++Probe) - if (!isEmptyFieldForLayout(Context, *Probe)) { + if (!Probe->isZeroSize(Context)) { // A member with storage sets the limit. assert((getFieldBitOffset(*Probe) % CharBits) == 0 && "Next storage is not byte-aligned"); @@ -732,7 +731,7 @@ void CGRecordLowering::accumulateBases() { // Bases can be zero-sized even if not technically empty if they // contain only a trailing array member. const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); - if (!isEmptyRecordForLayout(Context, Base.getType()) && + if (!BaseDecl->isEmpty() && !Context.getASTRecordLayout(BaseDecl).getNonVirtualSize().isZero()) Members.push_back(MemberInfo(Layout.getBaseClassOffset(BaseDecl), MemberInfo::Base, getStorageType(BaseDecl), BaseDecl)); @@ -880,7 +879,7 @@ CGRecordLowering::calculateTailClippingOffset(bool isNonVirtualBaseType) const { if (!isNonVirtualBaseType && isOverlappingVBaseABI()) for (const auto &Base : RD->vbases()) { const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); - if (isEmptyRecordForLayout(Context, Base.getType())) + if (BaseDecl->isEmpty()) continue; // If the vbase is a primary virtual base of some base, then it doesn't // get its own storage location but instead lives inside of that base. @@ -896,7 +895,7 @@ CGRecordLowering::calculateTailClippingOffset(bool isNonVirtualBaseType) const { void CGRecordLowering::accumulateVBases() { for (const auto &Base : RD->vbases()) { const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); - if (isEmptyRecordForLayout(Context, Base.getType())) + if (BaseDecl->isEmpty()) continue; CharUnits Offset = Layout.getVBaseClassOffset(BaseDecl); // If the vbase is a primary virtual base of some base, then it doesn't @@ -1157,7 +1156,7 @@ CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, llvm::StructType *Ty) { const FieldDecl *FD = *it; // Ignore zero-sized fields. - if (isEmptyFieldForLayout(getContext(), FD)) + if (FD->isZeroSize(getContext())) continue; // For non-bit-fields, just check that the LLVM struct offset matches the diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp index cd08f3ec397a0..d9eabf2c76989 100644 --- a/clang/lib/CodeGen/CodeGenTBAA.cpp +++ b/clang/lib/CodeGen/CodeGenTBAA.cpp @@ -15,7 +15,6 @@ //===----------------------------------------------------------------------===// #include "CodeGenTBAA.h" -#include "ABIInfoImpl.h" #include "CGCXXABI.h" #include "CGRecordLayout.h" #include "CodeGenTypes.h" @@ -448,7 +447,7 @@ CodeGenTBAA::CollectFields(uint64_t BaseOffset, unsigned idx = 0; for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { - if (isEmptyFieldForLayout(Context, *i)) + if ((*i)->isZeroSize(Context)) continue; uint64_t Offset = diff --git a/clang/test/CodeGen/2009-06-14-anonymous-union-init.c b/clang/test/CodeGen/2009-06-14-anonymous-union-init.c index a4375d7868f01..3f4493deea79e 100644 --- a/clang/test/CodeGen/2009-06-14-anonymous-union-init.c +++ b/clang/test/CodeGen/2009-06-14-anonymous-union-init.c @@ -1,19 +1,8 @@ -// RUN: %clang_cc1 %s -emit-llvm -triple x86_64-linux-gnu -o - | FileCheck %s --check-prefixes=CHECK,EMPTY -// RUN: %clang_cc1 %s -emit-llvm -triple x86_64-windows-msvc -o - | FileCheck %s --check-prefixes=CHECK,EMPTY-MSVC +// RUN: %clang_cc1 -emit-llvm < %s | grep "zeroinitializer, i16 16877" // PR4390 struct sysfs_dirent { - union { struct sysfs_elem_dir { int x; } s_dir; }; + union { struct sysfs_elem_dir {} s_dir; }; unsigned short s_mode; }; struct sysfs_dirent sysfs_root = { {}, 16877 }; -// CHECK: @sysfs_root = {{.*}}global { %union.anon, i16, [2 x i8] } { %union.anon zeroinitializer, i16 16877, [2 x i8] zeroinitializer } - -struct Foo { - union { struct empty {} x; }; - unsigned short s_mode; -}; -struct Foo foo = { {}, 16877 }; - -// EMPTY: @foo = {{.*}}global %struct.Foo { i16 16877 } -// EMPTY-MSVC: @foo = {{.*}}global %struct.Foo { [4 x i8] zeroinitializer, i16 16877 } diff --git a/clang/test/CodeGen/X86/x86_64-vaarg.c b/clang/test/CodeGen/X86/x86_64-vaarg.c index 450dfe5d15020..19802eedb02b7 100644 --- a/clang/test/CodeGen/X86/x86_64-vaarg.c +++ b/clang/test/CodeGen/X86/x86_64-vaarg.c @@ -56,8 +56,7 @@ typedef struct { // CHECK: vaarg.end: // CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP1]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ] // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[VAARG_ADDR]], i64 8, i1 false) -// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[RETVAL]], i32 0, i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load double, ptr [[COERCE_DIVE]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load double, ptr [[RETVAL]], align 8 // CHECK-NEXT: ret double [[TMP3]] // s1 f(int z, ...) { diff --git a/clang/test/CodeGen/paren-list-agg-init.cpp b/clang/test/CodeGen/paren-list-agg-init.cpp index 235352382332a..5c1c598dcd466 100644 --- a/clang/test/CodeGen/paren-list-agg-init.cpp +++ b/clang/test/CodeGen/paren-list-agg-init.cpp @@ -48,13 +48,14 @@ struct E { ~E() {}; }; +// CHECK-DAG: [[STRUCT_F:%.*]] = type { i8 } struct F { F (int i = 1); F (const F &f) = delete; F (F &&f) = default; }; -// CHECK-DAG: [[STRUCT_G:%.*]] = type <{ i32, [4 x i8] }> +// CHECK-DAG: [[STRUCT_G:%.*]] = type <{ i32, [[STRUCT_F]], [3 x i8] }> struct G { int a; F f; @@ -77,12 +78,12 @@ namespace gh61145 { ~Vec(); }; - // CHECK-DAG: [[STRUCT_S1:%.*]] = type { i8 } + // CHECK-DAG: [[STRUCT_S1:%.*]] = type { [[STRUCT_VEC]] } struct S1 { Vec v; }; - // CHECK-DAG: [[STRUCT_S2:%.*]] = type { i8, i8 } + // CHECK-DAG: [[STRUCT_S2:%.*]] = type { [[STRUCT_VEC]], i8 } struct S2 { Vec v; char c; @@ -376,7 +377,7 @@ void foo18() { // CHECK-NEXT: [[G:%.*g.*]] = alloca [[STRUCT_G]], align 4 // CHECK-NEXT: [[A:%.*a.*]] = getelementptr inbounds nuw [[STRUCT_G]], ptr [[G]], i32 0, i32 0 // CHECK-NEXT: store i32 2, ptr [[A]], align 4 -// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds i8, ptr [[G]], i64 4 +// CHECK-NEXT: [[F:%.*f.*]] = getelementptr inbounds nuw [[STRUCT_G]], ptr [[G]], i32 0, i32 1 // CHECk-NEXT: call void @{{.*F.*}}(ptr noundef nonnull align 1 dereferenceable(1)) [[F]], ie32 noundef 1) // CHECK: ret void void foo19() { @@ -391,8 +392,9 @@ namespace gh61145 { // CHECK-NEXT: [[AGG_TMP_ENSURED:%.*agg.tmp.ensured.*]] = alloca [[STRUCT_S1]], align 1 // a.k.a. Vec::Vec() // CHECK-NEXT: call void @_ZN7gh611453VecC1Ev(ptr noundef nonnull align 1 dereferenceable(1) [[V]]) + // CHECK-NEXT: [[V1:%.*v1.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[AGG_TMP_ENSURED]], i32 0, i32 0 // a.k.a. Vec::Vec(Vec&&) - // CHECK-NEXT: call void @_ZN7gh611453VecC1EOS0_(ptr noundef nonnull align 1 dereferenceable(1) [[AGG_TMP_ENSURED]], ptr noundef nonnull align 1 dereferenceable(1) [[V]]) + // CHECK-NEXT: call void @_ZN7gh611453VecC1EOS0_(ptr noundef nonnull align 1 dereferenceable(1) [[V1]], ptr noundef nonnull align 1 dereferenceable(1) [[V]]) // a.k.a. S1::~S1() // CHECK-NEXT: call void @_ZN7gh611452S1D1Ev(ptr noundef nonnull align 1 dereferenceable(1) [[AGG_TMP_ENSURED]]) // a.k.a.Vec::~Vec() @@ -411,8 +413,9 @@ namespace gh61145 { // CHECK-NEXT: [[AGG_TMP_ENSURED:%.*agg.tmp.ensured.*]] = alloca [[STRUCT_S2]], align 1 // a.k.a. Vec::Vec() // CHECK-NEXT: call void @_ZN7gh611453VecC1Ev(ptr noundef nonnull align 1 dereferenceable(1) [[V]]) + // CHECK-NEXT: [[V1:%.*v1.*]] = getelementptr inbounds nuw [[STRUCT_S2]], ptr [[AGG_TMP_ENSURED]], i32 0, i32 0 // a.k.a. Vec::Vec(Vec&&) - // CHECK-NEXT: call void @_ZN7gh611453VecC1EOS0_(ptr noundef nonnull align 1 dereferenceable(1) [[AGG_TMP_ENSURED]], ptr noundef nonnull align 1 dereferenceable(1) [[V]]) + // CHECK-NEXT: call void @_ZN7gh611453VecC1EOS0_(ptr noundef nonnull align 1 dereferenceable(1) [[V1]], ptr noundef nonnull align 1 dereferenceable(1) [[V]]) // CHECK-NEXT: [[C:%.*c.*]] = getelementptr inbounds nuw [[STRUCT_S2]], ptr [[AGG_TMP_ENSURED]], i32 0, i32 // CHECK-NEXT: store i8 0, ptr [[C]], align 1 // a.k.a. S2::~S2() diff --git a/clang/test/CodeGen/union-init2.c b/clang/test/CodeGen/union-init2.c index ee35e78a4f301..6e039e7e27d53 100644 --- a/clang/test/CodeGen/union-init2.c +++ b/clang/test/CodeGen/union-init2.c @@ -13,7 +13,7 @@ union z { }; union z y = {}; -// CHECK: @foo = {{.*}}global %union.Foo undef, align 1 +// CHECK: @foo = {{.*}}global %union.Foo zeroinitializer, align 1 // CHECK-CXX: @foo = {{.*}}global %union.Foo undef, align 1 union Foo { struct Empty {} val; diff --git a/clang/test/CodeGen/voidptr-vaarg.c b/clang/test/CodeGen/voidptr-vaarg.c index a0211642bd82f..9551418fe9258 100644 --- a/clang/test/CodeGen/voidptr-vaarg.c +++ b/clang/test/CodeGen/voidptr-vaarg.c @@ -245,8 +245,7 @@ typedef struct { // CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 // CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 4, i1 false) -// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_EMPTY_INT_T]], ptr [[RETVAL]], i32 0, i32 0 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[COERCE_DIVE]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL]], align 4 // CHECK-NEXT: ret i32 [[TMP0]] // empty_int_t empty_int(__builtin_va_list list) { diff --git a/clang/test/CodeGenCXX/2011-12-19-init-list-ctor.cpp b/clang/test/CodeGenCXX/2011-12-19-init-list-ctor.cpp index 3efb8c449c8fa..8922591f8e6f1 100644 --- a/clang/test/CodeGenCXX/2011-12-19-init-list-ctor.cpp +++ b/clang/test/CodeGenCXX/2011-12-19-init-list-ctor.cpp @@ -19,8 +19,8 @@ struct S { }; // CHECK: store i32 0, ptr @arr -// CHECK: call void @_ZN1AC1EPKc(ptr {{[^,]*}} getelementptr inbounds (i8, ptr @arr, i64 4), ptr noundef @.str) +// CHECK: call void @_ZN1AC1EPKc(ptr {{[^,]*}} getelementptr inbounds nuw (%struct.S, ptr @arr, i32 0, i32 1), ptr noundef @.str) // CHECK: store i32 1, ptr getelementptr inbounds (%struct.S, ptr @arr, i64 1) -// CHECK: call void @_ZN1AC1EPKc(ptr {{[^,]*}} getelementptr inbounds (i8, ptr getelementptr inbounds (%struct.S, ptr @arr, i64 1), i64 4), ptr noundef @.str.1) +// CHECK: call void @_ZN1AC1EPKc(ptr {{[^,]*}} getelementptr inbounds nuw (%struct.S, ptr getelementptr inbounds (%struct.S, ptr @arr, i64 1), i32 0, i32 1), ptr noundef @.str.1) // CHECK: store i32 2, ptr getelementptr inbounds (%struct.S, ptr @arr, i64 2) -// CHECK: call void @_ZN1AC1EPKc(ptr {{[^,]*}} getelementptr inbounds (i8, ptr getelementptr inbounds (%struct.S, ptr @arr, i64 2), i64 4), ptr noundef @.str.2) +// CHECK: call void @_ZN1AC1EPKc(ptr {{[^,]*}} getelementptr inbounds nuw (%struct.S, ptr getelementptr inbounds (%struct.S, ptr @arr, i64 2), i32 0, i32 1), ptr noundef @.str.2) diff --git a/clang/test/CodeGenCXX/bitfield-access-empty.cpp b/clang/test/CodeGenCXX/bitfield-access-empty.cpp index d1ae12e202cda..a06f62b3eb05a 100644 --- a/clang/test/CodeGenCXX/bitfield-access-empty.cpp +++ b/clang/test/CodeGenCXX/bitfield-access-empty.cpp @@ -84,8 +84,8 @@ struct P3 { unsigned b : 16; } p3; // CHECK-LABEL: LLVMType:%struct.P3 = -// LAYOUT-SAME: type { i16, [2 x i8], i16, [2 x i8] } -// LAYOUT-DWN32-SAME: type <{ i16, i8, i16 }> +// LAYOUT-SAME: type { i16, %struct.Empty, i16, [2 x i8] } +// LAYOUT-DWN32-SAME: type <{ i16, %struct.Empty, i16 }> // CHECK-NEXT: NonVirtualBaseLLVMType:%struct.P3 = // CHECK: BitFields:[ // LAYOUT-NEXT: diff --git a/clang/test/CodeGenCXX/class-layout.cpp b/clang/test/CodeGenCXX/class-layout.cpp index 90617d25b254e..84b0f887876ac 100644 --- a/clang/test/CodeGenCXX/class-layout.cpp +++ b/clang/test/CodeGenCXX/class-layout.cpp @@ -83,7 +83,7 @@ namespace Test6 { namespace Test7 { #pragma pack (1) class A {}; - // CHECK: %"class.Test7::B" = type <{ ptr, i8 }> + // CHECK: %"class.Test7::B" = type <{ ptr, %"class.Test7::A" }> class B { virtual ~B(); A a; diff --git a/clang/test/CodeGenCXX/compound-literals.cpp b/clang/test/CodeGenCXX/compound-literals.cpp index 1b4a1d4445123..fcec2d19e2def 100644 --- a/clang/test/CodeGenCXX/compound-literals.cpp +++ b/clang/test/CodeGenCXX/compound-literals.cpp @@ -20,7 +20,7 @@ int f() { // CHECK: [[LVALUE:%[a-z0-9.]+]] = alloca // CHECK-NEXT: [[I:%[a-z0-9]+]] = getelementptr inbounds {{.*}}, ptr [[LVALUE]], i32 0, i32 0 // CHECK-NEXT: store i32 17, ptr [[I]] - // CHECK-NEXT: [[X:%[a-z0-9]+]] = getelementptr inbounds {{.*}} [[LVALUE]], i32 4 + // CHECK-NEXT: [[X:%[a-z0-9]+]] = getelementptr inbounds {{.*}} [[LVALUE]], i32 0, i32 1 // CHECK-NEXT: call noundef ptr @_ZN1XC1EPKc({{.*}}[[X]] // CHECK-NEXT: [[I:%[a-z0-9]+]] = getelementptr inbounds {{.*}} [[LVALUE]], i32 0, i32 0 // CHECK-NEXT: [[RESULT:%[a-z0-9]+]] = load i32, ptr diff --git a/clang/test/CodeGenCXX/exceptions.cpp b/clang/test/CodeGenCXX/exceptions.cpp index 61cffd1023b88..9875740c09b41 100644 --- a/clang/test/CodeGenCXX/exceptions.cpp +++ b/clang/test/CodeGenCXX/exceptions.cpp @@ -513,7 +513,8 @@ namespace test11 { // CHECK-LABEL: define{{.*}} void @_ZN6test111CC2Ev( // CHECK: [[THIS:%.*]] = load ptr, ptr {{%.*}} // Construct single. - // CHECK-NEXT: call void @_ZN6test111AC1Ev(ptr {{[^,]*}} [[THIS]]) + // CHECK-NEXT: [[SINGLE:%.*]] = getelementptr inbounds nuw [[C:%.*]], ptr [[THIS]], i32 0, i32 0 + // CHECK-NEXT: call void @_ZN6test111AC1Ev(ptr {{[^,]*}} [[SINGLE]]) // Construct array. // CHECK-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw [[C:%.*]], ptr [[THIS]], i32 0, i32 1 // CHECK-NEXT: [[ARRAYBEGIN:%.*]] = getelementptr inbounds [2 x [3 x [[A:%.*]]]], ptr [[ARRAY]], i32 0, i32 0, i32 0 @@ -559,8 +560,8 @@ namespace test11 { // CHECK: br label // Finally, the cleanup for single. - // CHECK98: invoke void @_ZN6test111AD1Ev(ptr {{[^,]*}} [[THIS]]) - // CHECK11: call void @_ZN6test111AD1Ev(ptr {{[^,]*}} [[THIS]]) + // CHECK98: invoke void @_ZN6test111AD1Ev(ptr {{[^,]*}} [[SINGLE]]) + // CHECK11: call void @_ZN6test111AD1Ev(ptr {{[^,]*}} [[SINGLE]]) // CHECK: br label // CHECK: resume diff --git a/clang/test/CodeGenCXX/lambda-deterministic-captures.cpp b/clang/test/CodeGenCXX/lambda-deterministic-captures.cpp index ab44f43720832..ef3847d0c1e93 100644 --- a/clang/test/CodeGenCXX/lambda-deterministic-captures.cpp +++ b/clang/test/CodeGenCXX/lambda-deterministic-captures.cpp @@ -16,7 +16,8 @@ void foo() { } // CHECK: define{{.*}} void @_Z3foov -// CHECK: getelementptr inbounds nuw %{{.+}}, ptr %{{.+}}, i32 0, i32 1 +// CHECK: getelementptr inbounds nuw %{{.+}}, ptr %{{.+}}, i32 0, i32 0 +// CHECK-NEXT: getelementptr inbounds nuw %{{.+}}, ptr %{{.+}}, i32 0, i32 1 // CHECK-NEXT: store float 0.000 // CHECK-NEXT: getelementptr inbounds nuw %{{.+}}, ptr %{{.+}}, i32 0, i32 2 // CHECK-NEXT: store float 1.000 @@ -26,6 +27,7 @@ void foo() { // The lambda body. Reverse iteration when the captures aren't deterministic // causes these to be laid out differently in the lambda. // CHECK: define internal void +// CHECK: getelementptr inbounds nuw %{{.+}}, ptr %{{.+}}, i32 0, i32 0 // CHECK: getelementptr inbounds nuw %{{.+}}, ptr %{{.+}}, i32 0, i32 1 // CHECK: getelementptr inbounds nuw %{{.+}}, ptr %{{.+}}, i32 0, i32 2 // CHECK: getelementptr inbounds nuw %{{.+}}, ptr %{{.+}}, i32 0, i32 3 diff --git a/clang/test/CodeGenCXX/partial-destruction.cpp b/clang/test/CodeGenCXX/partial-destruction.cpp index 548a9f154be9e..5412e1ddd6274 100644 --- a/clang/test/CodeGenCXX/partial-destruction.cpp +++ b/clang/test/CodeGenCXX/partial-destruction.cpp @@ -107,12 +107,13 @@ namespace test1 { // CHECK: [[V:%.*]] = alloca [[B:%.*]], align 4 // CHECK-NEXT: alloca ptr // CHECK-NEXT: alloca i32 - // CHECK-NEXT: call void @_ZN5test11AC1Ei(ptr {{[^,]*}} [[V]], i32 noundef 5) - // CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 1 + // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[B]], ptr [[V]], i32 0, i32 0 + // CHECK-NEXT: call void @_ZN5test11AC1Ei(ptr {{[^,]*}} [[X]], i32 noundef 5) + // CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[B]], ptr [[V]], i32 0, i32 1 // CHECK-NEXT: invoke void @_ZN5test11AC1Ei(ptr {{[^,]*}} [[Y]], i32 noundef 6) - // CHECK: [[Z:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 2 + // CHECK: [[Z:%.*]] = getelementptr inbounds nuw [[B]], ptr [[V]], i32 0, i32 2 // CHECK-NEXT: invoke void @_ZN5test11AC1Ei(ptr {{[^,]*}} [[Z]], i32 noundef 7) - // CHECK: [[W:%.*]] = getelementptr inbounds nuw [[B]], ptr [[V]], i32 0, i32 1 + // CHECK: [[W:%.*]] = getelementptr inbounds nuw [[B]], ptr [[V]], i32 0, i32 3 // CHECK-NEXT: store i32 8, ptr [[W]], align 4 // CHECK-NEXT: call void @_ZN5test11BD1Ev(ptr {{[^,]*}} [[V]]) // CHECK-NEXT: ret void @@ -123,9 +124,9 @@ namespace test1 { // CHECK: landingpad { ptr, i32 } // CHECK-NEXT: cleanup // CHECKv03: invoke void @_ZN5test11AD1Ev(ptr {{[^,]*}} [[Y]]) - // CHECKv03: invoke void @_ZN5test11AD1Ev(ptr {{[^,]*}} [[V]]) + // CHECKv03: invoke void @_ZN5test11AD1Ev(ptr {{[^,]*}} [[X]]) // CHECKv11: call void @_ZN5test11AD1Ev(ptr {{[^,]*}} [[Y]]) - // CHECKv11: call void @_ZN5test11AD1Ev(ptr {{[^,]*}} [[V]]) + // CHECKv11: call void @_ZN5test11AD1Ev(ptr {{[^,]*}} [[X]]) } namespace test2 { diff --git a/clang/test/CodeGenCXX/pod-member-memcpys.cpp b/clang/test/CodeGenCXX/pod-member-memcpys.cpp index 8efec6184a3da..16d3d45a8179b 100644 --- a/clang/test/CodeGenCXX/pod-member-memcpys.cpp +++ b/clang/test/CodeGenCXX/pod-member-memcpys.cpp @@ -1,8 +1,6 @@ // RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-apple-darwin10 -emit-llvm -std=c++03 -fexceptions -fcxx-exceptions -o - %s | FileCheck %s // RUN: %clang_cc1 -no-enable-noundef-analysis -triple i386-apple-darwin10 -emit-llvm -std=c++03 -o - %s | FileCheck --check-prefix=CHECK-2 %s -struct Empty {}; - struct POD { int w, x, y, z; }; @@ -108,20 +106,6 @@ struct __attribute__((packed)) PackedMembers { int w, x, y, z; }; -struct WithEmptyField { - int a; - Empty e; - NonPOD np; - int b; -}; - -struct WithEmptyNUAField { - int a; - [[no_unique_address]] Empty e; - NonPOD np; - int b; -}; - // COPY-ASSIGNMENT OPERATORS: // Assignment operators are output in the order they're encountered. @@ -137,8 +121,6 @@ CALL_AO(VolatileMember) CALL_AO(BitfieldMember) CALL_AO(InnerClassMember) CALL_AO(PackedMembers) -CALL_AO(WithEmptyField) -CALL_AO(WithEmptyNUAField) // Basic copy-assignment: // CHECK-LABEL: define linkonce_odr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN5BasicaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) @@ -203,18 +185,6 @@ CALL_AO(WithEmptyNUAField) // CHECK: call void @llvm.memcpy.p0.p0.i64({{.*}} align 1 {{.*}} align 1 {{.*}}i64 16, i1 {{.*}}) // CHECK: ret ptr -// WithEmptyField copy-assignment: -// CHECK-LABEL: define linkonce_odr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN14WithEmptyFieldaSERKS_ -// CHECK: call void @llvm.memcpy.p0.p0.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 4, i1 {{.*}}) -// CHECK: call nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN6NonPODaSERKS_ -// CHECK: ret ptr - -// WithEmptyNUAField copy-assignment: -// CHECK-LABEL: define linkonce_odr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN17WithEmptyNUAFieldaSERKS_ -// CHECK: call void @llvm.memcpy.p0.p0.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 4, i1 {{.*}}) -// CHECK: call nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN6NonPODaSERKS_ -// CHECK: ret ptr - // COPY-CONSTRUCTORS: // Clang outputs copy-constructors in the reverse of the order that @@ -310,15 +280,3 @@ CALL_CC(Basic) // CHECK: call void @_ZN6NonPODC1ERKS_ // CHECK: call void @llvm.memcpy.p0.p0.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}}) // CHECK: ret void - -CALL_CC(WithEmptyField) -// WithEmptyField copy-constructor: -// CHECK-LABEL: define linkonce_odr void @_ZN14WithEmptyFieldC2ERKS_ -// CHECK: call void @llvm.memcpy.p0.p0.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 4, i1 {{.*}}) -// CHECK: call void @_ZN6NonPODC1ERKS_ - -CALL_CC(WithEmptyNUAField) -// WithEmptyNUAField copy-constructor: -// CHECK-LABEL: define linkonce_odr void @_ZN17WithEmptyNUAFieldC2ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) -// CHECK: call void @llvm.memcpy.p0.p0.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 4, i1 {{.*}}) -// CHECK: call void @_ZN6NonPODC1ERKS_ diff --git a/clang/test/CodeGenCXX/pr18962.cpp b/clang/test/CodeGenCXX/pr18962.cpp index 9ac87003c94c5..b564a7b9a73af 100644 --- a/clang/test/CodeGenCXX/pr18962.cpp +++ b/clang/test/CodeGenCXX/pr18962.cpp @@ -23,6 +23,7 @@ D p3; // We end up using an opaque type for 'append' to avoid circular references. // CHECK: %class.A = type { ptr } -// CHECK: %class.C = type <{ ptr, [4 x i8] }> +// CHECK: %class.C = type <{ ptr, %class.B, [3 x i8] }> +// CHECK: %class.B = type { i8 } // CHECK: %class.D = type { %class.C.base, [3 x i8] } -// CHECK: %class.C.base = type <{ ptr, i8 }> +// CHECK: %class.C.base = type <{ ptr, %class.B }> diff --git a/clang/test/CodeGenCXX/references.cpp b/clang/test/CodeGenCXX/references.cpp index b84cb788d161c..0fca5e76659c2 100644 --- a/clang/test/CodeGenCXX/references.cpp +++ b/clang/test/CodeGenCXX/references.cpp @@ -191,6 +191,7 @@ namespace N2 { // CHECK-LABEL: define{{.*}} void @_ZN2N21fEi // CHECK: call void @_ZN2N24getPEv + // CHECK: getelementptr inbounds // CHECK: store i32 17 // CHECK: call void @_ZN2N21PD1Ev void f(int i) { @@ -219,7 +220,8 @@ namespace N2 { // CHECK-LABEL: define{{.*}} void @_ZN2N21gEi // CHECK: call void @_ZN2N24getZEv - // CHECK: {{getelementptr inbounds.*i64 16}} + // CHECK: {{getelementptr inbounds.*i32 0, i32 0}} + // CHECK: {{getelementptr inbounds.*i32 0, i32 0}} // CHECK: store i32 19 // CHECK: call void @_ZN2N21ZD1Ev // CHECK: ret void diff --git a/clang/test/CodeGenCXX/temporaries.cpp b/clang/test/CodeGenCXX/temporaries.cpp index 44978dd403ad9..36ab0e89f7d50 100644 --- a/clang/test/CodeGenCXX/temporaries.cpp +++ b/clang/test/CodeGenCXX/temporaries.cpp @@ -714,7 +714,7 @@ namespace MultipleExtension { // CHECK: call i32 @__cxa_atexit({{.*}} @_ZN17MultipleExtension1AD1Ev, {{.*}} @[[TEMPA]] // CHECK: store {{.*}} @[[TEMPA]], {{.*}} @[[TEMPE:_ZGRN17MultipleExtension2e1E.*]], - // CHECK: call void @_ZN17MultipleExtension1BC1Ev({{.*}} getelementptr inbounds ({{.*}} @[[TEMPE]], i64 8)) + // CHECK: call void @_ZN17MultipleExtension1BC1Ev({{.*}} getelementptr inbounds nuw ({{.*}} @[[TEMPE]], i32 0, i32 1)) // CHECK: call void @_ZN17MultipleExtension1DC1Ev({{.*}} @[[TEMPD:_ZGRN17MultipleExtension2e1E.*]]) // CHECK: call i32 @__cxa_atexit({{.*}} @_ZN17MultipleExtension1DD1Ev, {{.*}} @[[TEMPD]] @@ -728,7 +728,7 @@ namespace MultipleExtension { // CHECK: call i32 @__cxa_atexit({{.*}} @_ZN17MultipleExtension1AD1Ev, {{.*}} @[[TEMPA]] // CHECK: store {{.*}} @[[TEMPA]], {{.*}} @[[E:_ZN17MultipleExtension2e2E]] - // CHECK: call void @_ZN17MultipleExtension1BC1Ev({{.*}} getelementptr inbounds ({{.*}} @[[E]], i64 8)) + // CHECK: call void @_ZN17MultipleExtension1BC1Ev({{.*}} getelementptr inbounds nuw ({{.*}} @[[E]], i32 0, i32 1)) // CHECK: call void @_ZN17MultipleExtension1DC1Ev({{.*}} @[[TEMPD:_ZGRN17MultipleExtension2e2E.*]]) // CHECK: call i32 @__cxa_atexit({{.*}} @_ZN17MultipleExtension1DD1Ev, {{.*}} @[[TEMPD]] @@ -743,11 +743,11 @@ namespace MultipleExtension { // CHECK: %[[TEMPE1_A:.*]] = getelementptr inbounds {{.*}} %[[TEMPE1:.*]], i32 0, i32 0 // CHECK: call void @[[NS]]1AC1Ev({{.*}} %[[TEMPA1:.*]]) // CHECK: store {{.*}} %[[TEMPA1]], {{.*}} %[[TEMPE1_A]] - // CHECK: %[[TEMPE1_B:.*]] = getelementptr inbounds {{.*}} %[[TEMPE1]], i64 8 + // CHECK: %[[TEMPE1_B:.*]] = getelementptr inbounds {{.*}} %[[TEMPE1]], i32 0, i32 1 // CHECK: call void @[[NS]]1BC1Ev({{.*}} %[[TEMPE1_B]]) // CHECK: %[[TEMPE1_C:.*]] = getelementptr inbounds {{.*}} %[[TEMPE1]], i32 0, i32 2 // CHECK: call void @[[NS]]1DC1Ev({{.*}} %[[TEMPD1:.*]]) - // CHECK: %[[TEMPD1_C:.*]] = getelementptr inbounds {{.*}} %[[TEMPD1]], i64 4 + // CHECK: %[[TEMPD1_C:.*]] = getelementptr inbounds {{.*}} %[[TEMPD1]], i32 0, i32 1 // CHECK: store {{.*}} %[[TEMPD1_C]], {{.*}} %[[TEMPE1_C]] // CHECK: store {{.*}} %[[TEMPE1]], {{.*}} %[[E1:.*]] @@ -758,11 +758,11 @@ namespace MultipleExtension { // CHECK: %[[TEMPE2_A:.*]] = getelementptr inbounds {{.*}} %[[E2:.*]], i32 0, i32 0 // CHECK: call void @[[NS]]1AC1Ev({{.*}} %[[TEMPA2:.*]]) // CHECK: store {{.*}} %[[TEMPA2]], {{.*}} %[[TEMPE2_A]] - // CHECK: %[[TEMPE2_B:.*]] = getelementptr inbounds {{.*}} %[[E2]], i64 8 + // CHECK: %[[TEMPE2_B:.*]] = getelementptr inbounds {{.*}} %[[E2]], i32 0, i32 1 // CHECK: call void @[[NS]]1BC1Ev({{.*}} %[[TEMPE2_B]]) // CHECK: %[[TEMPE2_C:.*]] = getelementptr inbounds {{.*}} %[[E2]], i32 0, i32 2 // CHECK: call void @[[NS]]1DC1Ev({{.*}} %[[TEMPD2:.*]]) - // CHECK: %[[TEMPD2_C:.*]] = getelementptr inbounds {{.*}} %[[TEMPD2]], i64 4 + // CHECK: %[[TEMPD2_C:.*]] = getelementptr inbounds {{.*}} %[[TEMPD2]], i32 0, i32 1 // CHECK: store {{.*}} %[[TEMPD2_C]], ptr %[[TEMPE2_C]] g(); diff --git a/clang/test/CodeGenObjCXX/lambda-to-block.mm b/clang/test/CodeGenObjCXX/lambda-to-block.mm index a8657ca711f7c..b1e1338c6ac1e 100644 --- a/clang/test/CodeGenObjCXX/lambda-to-block.mm +++ b/clang/test/CodeGenObjCXX/lambda-to-block.mm @@ -2,10 +2,11 @@ // Shouldn't crash! -// CHECK: %[[CLASS_ANON:.*]] = type { i8 } -// CHECK: %[[CLASS_ANON_0:.*]] = type { i8 } -// CHECK: %[[CLASS_ANON_1:.*]] = type { i8 } -// CHECK: %[[CLASS_ANON_2:.*]] = type { i8 } +// CHECK: %[[CLASS_ANON:.*]] = type { %[[STRUCT_COPYABLE:.*]] } +// CHECK: %[[STRUCT_COPYABLE]] = type { i8 } +// CHECK: %[[CLASS_ANON_0:.*]] = type { %[[STRUCT_COPYABLE]] } +// CHECK: %[[CLASS_ANON_1:.*]] = type { %[[STRUCT_COPYABLE]] } +// CHECK: %[[CLASS_ANON_2:.*]] = type { %[[STRUCT_COPYABLE]] } // CHECK: @[[BLOCK_DESC0:.*]] = internal constant { i64, i64, ptr, ptr, ptr, ptr } { i64 0, i64 33, ptr @[[COPY_HELPER0:.*__copy_helper_block_.*]], ptr @__destroy_helper_block{{.*}}, {{.*}}}, align 8 // CHECK: @[[BLOCK_DESC1:.*]] = internal constant { i64, i64, ptr, ptr, ptr, ptr } { i64 0, i64 33, ptr @[[COPY_HELPER1:.*__copy_helper_block_.*]], ptr @__destroy_helper_block{{.*}}, {{.*}}}, align 8 diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp b/clang/test/OpenMP/amdgcn_sret_ctor.cpp index fc6f7c15eb5e6..81d0cce5190e7 100644 --- a/clang/test/OpenMP/amdgcn_sret_ctor.cpp +++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp @@ -19,8 +19,9 @@ E::E() noexcept : foo(s()) {} // CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr // CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[THIS1_ASCAST:%.*]] = addrspacecast ptr [[THIS1]] to ptr addrspace(5) -// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[THIS1_ASCAST]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: [[FOO:%.*]] = getelementptr inbounds nuw [[STRUCT_E:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[FOO_ASCAST:%.*]] = addrspacecast ptr [[FOO]] to ptr addrspace(5) +// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[FOO_ASCAST]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: ret void // // diff --git a/clang/test/OpenMP/irbuilder_for_iterator.cpp b/clang/test/OpenMP/irbuilder_for_iterator.cpp index ec1c3af744b49..e1e8ff66cd8aa 100644 --- a/clang/test/OpenMP/irbuilder_for_iterator.cpp +++ b/clang/test/OpenMP/irbuilder_for_iterator.cpp @@ -48,48 +48,49 @@ extern "C" void workshareloop_iterator(float *a, float *b, float *c) { // CHECK-NEXT: call void @_ZN10MyIteratorC1Ej(ptr noundef nonnull align 1 dereferenceable(1) [[IT]], i32 noundef 7) // CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK-NEXT: store ptr [[IT]], ptr [[TMP0]], align 8 -// CHECK-NEXT: call void @_ZN10MyIteratorC1ERKS_(ptr noundef nonnull align 1 dereferenceable(1) [[AGG_CAPTURED1]], ptr noundef nonnull align 1 dereferenceable(1) [[IT]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: call void @_ZN10MyIteratorC1ERKS_(ptr noundef nonnull align 1 dereferenceable(1) [[TMP1]], ptr noundef nonnull align 1 dereferenceable(1) [[IT]]) // CHECK-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) // CHECK-NEXT: [[DOTCOUNT:%.*]] = load i64, ptr [[DOTCOUNT_ADDR]], align 8 // CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] // CHECK: omp_loop.preheader: // CHECK-NEXT: store i64 0, ptr [[P_LOWERBOUND]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[DOTCOUNT]], 1 -// CHECK-NEXT: store i64 [[TMP1]], ptr [[P_UPPERBOUND]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DOTCOUNT]], 1 +// CHECK-NEXT: store i64 [[TMP2]], ptr [[P_UPPERBOUND]], align 8 // CHECK-NEXT: store i64 1, ptr [[P_STRIDE]], align 8 // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i64 1, i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[P_LOWERBOUND]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[P_UPPERBOUND]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 1 +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[P_LOWERBOUND]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[P_UPPERBOUND]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], [[TMP3]] +// CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 1 // CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] // CHECK: omp_loop.header: // CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i64 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] // CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] // CHECK: omp_loop.cond: -// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i64 [[OMP_LOOP_IV]], [[TMP5]] +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i64 [[OMP_LOOP_IV]], [[TMP6]] // CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] // CHECK: omp_loop.body: -// CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OMP_LOOP_IV]], [[TMP2]] -// CHECK-NEXT: call void @__captured_stmt.1(ptr [[IT]], i64 [[TMP6]], ptr [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OMP_LOOP_IV]], [[TMP3]] +// CHECK-NEXT: call void @__captured_stmt.1(ptr [[IT]], i64 [[TMP7]], ptr [[AGG_CAPTURED1]]) // CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_ZNK10MyIteratordeEv(ptr noundef nonnull align 1 dereferenceable(1) [[IT]]) // CHECK-NEXT: store i32 [[CALL]], ptr [[I]], align 4 -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[IDXPROM]] -// CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[IDXPROM2]] -// CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP9]], [[TMP12]] -// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP10]], [[TMP13]] +// CHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP15]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM4]] // CHECK-NEXT: store float [[MUL]], ptr [[ARRAYIDX5]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_INC]] // CHECK: omp_loop.inc: @@ -154,11 +155,12 @@ extern "C" void workshareloop_iterator(float *a, float *b, float *c) { // CHECK-NEXT: store i64 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 8 // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[LOGICAL_ADDR]], align 8 -// CHECK-NEXT: [[MUL:%.*]] = mul i64 1, [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[LOGICAL_ADDR]], align 8 +// CHECK-NEXT: [[MUL:%.*]] = mul i64 1, [[TMP2]] // CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[MUL]] to i32 -// CHECK-NEXT: call void @_ZNK10MyIteratorplEj(ptr dead_on_unwind writable sret([[STRUCT_MYITERATOR]]) align 1 [[REF_TMP]], ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]], i32 noundef [[CONV]]) -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: [[CALL:%.*]] = call noundef nonnull align 1 dereferenceable(1) ptr @_ZN10MyIteratoraSERKS_(ptr noundef nonnull align 1 dereferenceable(1) [[TMP2]], ptr noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) +// CHECK-NEXT: call void @_ZNK10MyIteratorplEj(ptr dead_on_unwind writable sret([[STRUCT_MYITERATOR]]) align 1 [[REF_TMP]], ptr noundef nonnull align 1 dereferenceable(1) [[TMP1]], i32 noundef [[CONV]]) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call noundef nonnull align 1 dereferenceable(1) ptr @_ZN10MyIteratoraSERKS_(ptr noundef nonnull align 1 dereferenceable(1) [[TMP3]], ptr noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/irbuilder_for_rangefor.cpp b/clang/test/OpenMP/irbuilder_for_rangefor.cpp index 86a043e638bc3..635382f737f18 100644 --- a/clang/test/OpenMP/irbuilder_for_rangefor.cpp +++ b/clang/test/OpenMP/irbuilder_for_rangefor.cpp @@ -66,46 +66,47 @@ extern "C" void workshareloop_rangefor(float *a, float *b, float *c) { // CHECK-NEXT: store ptr [[__BEGIN2]], ptr [[TMP2]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK-NEXT: store ptr [[__END2]], ptr [[TMP3]], align 8 -// CHECK-NEXT: call void @_ZN10MyIteratorC1ERKS_(ptr noundef nonnull align 1 dereferenceable(1) [[AGG_CAPTURED1]], ptr noundef nonnull align 1 dereferenceable(1) [[__BEGIN2]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: call void @_ZN10MyIteratorC1ERKS_(ptr noundef nonnull align 1 dereferenceable(1) [[TMP4]], ptr noundef nonnull align 1 dereferenceable(1) [[__BEGIN2]]) // CHECK-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) // CHECK-NEXT: [[DOTCOUNT:%.*]] = load i64, ptr [[DOTCOUNT_ADDR]], align 8 // CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] // CHECK: omp_loop.preheader: // CHECK-NEXT: store i64 0, ptr [[P_LOWERBOUND]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[DOTCOUNT]], 1 -// CHECK-NEXT: store i64 [[TMP4]], ptr [[P_UPPERBOUND]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[DOTCOUNT]], 1 +// CHECK-NEXT: store i64 [[TMP5]], ptr [[P_UPPERBOUND]], align 8 // CHECK-NEXT: store i64 1, ptr [[P_STRIDE]], align 8 // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i64 1, i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[P_LOWERBOUND]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[P_UPPERBOUND]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], [[TMP5]] -// CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[P_LOWERBOUND]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[P_UPPERBOUND]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], [[TMP6]] +// CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 1 // CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] // CHECK: omp_loop.header: // CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i64 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] // CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] // CHECK: omp_loop.cond: -// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i64 [[OMP_LOOP_IV]], [[TMP8]] +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i64 [[OMP_LOOP_IV]], [[TMP9]] // CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] // CHECK: omp_loop.body: -// CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OMP_LOOP_IV]], [[TMP5]] -// CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i64 [[TMP9]], ptr [[AGG_CAPTURED1]]) -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[IDXPROM]] -// CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM2]] -// CHECK-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP12]], [[TMP15]] -// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OMP_LOOP_IV]], [[TMP6]] +// CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i64 [[TMP10]], ptr [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP15]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP13]], [[TMP16]] +// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM4]] // CHECK-NEXT: store float [[MUL]], ptr [[ARRAYIDX5]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_INC]] // CHECK: omp_loop.inc: @@ -172,12 +173,13 @@ extern "C" void workshareloop_rangefor(float *a, float *b, float *c) { // CHECK-NEXT: store i64 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 8 // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[LOGICAL_ADDR]], align 8 -// CHECK-NEXT: [[MUL:%.*]] = mul i64 1, [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[LOGICAL_ADDR]], align 8 +// CHECK-NEXT: [[MUL:%.*]] = mul i64 1, [[TMP2]] // CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[MUL]] to i32 -// CHECK-NEXT: call void @_ZNK10MyIteratorplEj(ptr dead_on_unwind writable sret([[STRUCT_MYITERATOR]]) align 1 [[REF_TMP]], ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]], i32 noundef [[CONV]]) +// CHECK-NEXT: call void @_ZNK10MyIteratorplEj(ptr dead_on_unwind writable sret([[STRUCT_MYITERATOR]]) align 1 [[REF_TMP]], ptr noundef nonnull align 1 dereferenceable(1) [[TMP1]], i32 noundef [[CONV]]) // CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_ZNK10MyIteratordeEv(ptr noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 [[CALL]], ptr [[TMP2]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[CALL]], ptr [[TMP3]], align 4 // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/task_member_call_codegen.cpp b/clang/test/OpenMP/task_member_call_codegen.cpp index a6ae29c1f9f6d..8f7d2d15d0e26 100644 --- a/clang/test/OpenMP/task_member_call_codegen.cpp +++ b/clang/test/OpenMP/task_member_call_codegen.cpp @@ -32,8 +32,9 @@ void c() { // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: [[TMP1:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, ptr @.omp_task_entry.) // CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP1]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]) // CHECK1-NEXT: ret void // // @@ -45,8 +46,9 @@ void c() { // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK1-NEXT: ret void // // @@ -70,7 +72,7 @@ void c() { // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 40 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) @@ -98,9 +100,10 @@ void c() { // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK3-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1, i64 48, i64 1, ptr @.omp_task_entry.) // CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 40 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) -// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], ptr [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -112,8 +115,9 @@ void c() { // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK3-NEXT: ret void // // @@ -137,7 +141,7 @@ void c() { // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 40 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 06c827c41eacc..5bcae291428f6 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -1399,7 +1399,10 @@ DECLARE_REAL(hsa_status_t, hsa_amd_ipc_memory_attach, DECLARE_REAL(hsa_status_t, hsa_amd_ipc_memory_detach, void *mapped_ptr) DECLARE_REAL(hsa_status_t, hsa_amd_vmem_address_reserve_align, void** ptr, size_t size, uint64_t address, uint64_t alignment, uint64_t flags) -DECLARE_REAL(hsa_status_t, hsa_amd_vmem_address_free, void* ptr, size_t size); +DECLARE_REAL(hsa_status_t, hsa_amd_vmem_address_free, void* ptr, size_t size) +DECLARE_REAL(hsa_status_t, hsa_amd_pointer_info, const void* ptr, + hsa_amd_pointer_info_t* info, void* (*alloc)(size_t), + uint32_t* num_agents_accessible, hsa_agent_t** accessible) namespace __asan { @@ -1452,18 +1455,22 @@ static struct AP64 AP_; static struct AP32 AP_; #endif -hsa_status_t asan_hsa_amd_ipc_memory_create(void *ptr, size_t len, - hsa_amd_ipc_memory_t * handle) { - void *ptr_; - size_t len_ = get_allocator().GetActuallyAllocatedSize(ptr); - if (len_) { +hsa_status_t asan_hsa_amd_ipc_memory_create(void* ptr, size_t len, + hsa_amd_ipc_memory_t* handle) { + void* ptr_ = get_allocator().GetBlockBegin(ptr); + AsanChunk* m = ptr_ + ? instance.GetAsanChunkByAddr(reinterpret_cast(ptr_)) + : nullptr; + if (ptr_ && m) { static_assert(AP_.kMetadataSize == 0, "Expression below requires this"); - ptr_ = reinterpret_cast(reinterpret_cast(ptr) - kPageSize_); - } else { - ptr_ = ptr; - len_ = len; + uptr p = reinterpret_cast(ptr); + uptr p_ = reinterpret_cast(ptr_); + if (p == p_ + kPageSize_ && len == m->UsedSize()) { + size_t len_ = get_allocator().GetActuallyAllocatedSize(ptr_); + return REAL(hsa_amd_ipc_memory_create)(ptr_, len_, handle); + } } - return REAL(hsa_amd_ipc_memory_create)(ptr_, len_, handle); + return REAL(hsa_amd_ipc_memory_create)(ptr, len, handle); } hsa_status_t asan_hsa_amd_ipc_memory_attach(const hsa_amd_ipc_memory_t *handle, @@ -1540,5 +1547,36 @@ hsa_status_t asan_hsa_amd_vmem_address_free(void* ptr, size_t size, } return REAL(hsa_amd_vmem_address_free)(ptr, size); } + +hsa_status_t asan_hsa_amd_pointer_info(const void* ptr, + hsa_amd_pointer_info_t* info, + void* (*alloc)(size_t), + uint32_t* num_agents_accessible, + hsa_agent_t** accessible) { + void* ptr_ = get_allocator().GetBlockBegin(ptr); + AsanChunk* m = ptr_ + ? instance.GetAsanChunkByAddr(reinterpret_cast(ptr_)) + : nullptr; + if (ptr_ && m) { + hsa_status_t status = REAL(hsa_amd_pointer_info)( + ptr_, info, alloc, num_agents_accessible, accessible); + if (status == HSA_STATUS_SUCCESS && info) { + static_assert(AP_.kMetadataSize == 0, "Expression below requires this"); + // Adjust base address of agent,host and sizeInBytes so as to return + // the actual pointer information of user allocation rather than asan + // allocation. Asan allocation pointer info can be acquired using internal + // 'GetPointerInfo' + info->agentBaseAddress = reinterpret_cast( + reinterpret_cast(info->agentBaseAddress) + kPageSize_); + info->hostBaseAddress = reinterpret_cast( + reinterpret_cast(info->hostBaseAddress) + kPageSize_); + info->sizeInBytes = m->UsedSize(); + } + return status; + } + return REAL(hsa_amd_pointer_info)(ptr, info, alloc, num_agents_accessible, + accessible); +} + } // namespace __asan #endif diff --git a/compiler-rt/lib/asan/asan_allocator.h b/compiler-rt/lib/asan/asan_allocator.h index ced10f62b7a58..f33e8d3b2819e 100644 --- a/compiler-rt/lib/asan/asan_allocator.h +++ b/compiler-rt/lib/asan/asan_allocator.h @@ -341,6 +341,11 @@ hsa_status_t asan_hsa_amd_vmem_address_reserve_align(void** ptr, size_t size, BufferedStackTrace* stack); hsa_status_t asan_hsa_amd_vmem_address_free(void* ptr, size_t size, BufferedStackTrace* stack); +hsa_status_t asan_hsa_amd_pointer_info(const void* ptr, + hsa_amd_pointer_info_t* info, + void* (*alloc)(size_t), + uint32_t* num_agents_accessible, + hsa_agent_t** accessible); } // namespace __asan #endif diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp index 0951a77b1b93e..c04d532f909b1 100644 --- a/compiler-rt/lib/asan/asan_interceptors.cpp +++ b/compiler-rt/lib/asan/asan_interceptors.cpp @@ -948,6 +948,15 @@ INTERCEPTOR(hsa_status_t, hsa_amd_vmem_address_free, void* ptr, size_t size) { return asan_hsa_amd_vmem_address_free(ptr, size, &stack); } +INTERCEPTOR(hsa_status_t, hsa_amd_pointer_info, const void* ptr, + hsa_amd_pointer_info_t* info, void* (*alloc)(size_t), + uint32_t* num_agents_accessible, hsa_agent_t** accessible) { + AsanInitFromRtl(); + ENSURE_HSA_INITED(); + return asan_hsa_amd_pointer_info(ptr, info, alloc, num_agents_accessible, + accessible); +} + void InitializeAmdgpuInterceptors() { ASAN_INTERCEPT_FUNC(hsa_memory_copy); ASAN_INTERCEPT_FUNC(hsa_amd_memory_pool_allocate); @@ -962,6 +971,7 @@ void InitializeAmdgpuInterceptors() { ASAN_INTERCEPT_FUNC(hsa_amd_ipc_memory_detach); ASAN_INTERCEPT_FUNC(hsa_amd_vmem_address_reserve_align); ASAN_INTERCEPT_FUNC(hsa_amd_vmem_address_free); + ASAN_INTERCEPT_FUNC(hsa_amd_pointer_info); } void ENSURE_HSA_INITED() { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 96a715a2cb0f0..0289d19a44c49 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -914,12 +914,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { // If DWARF address space value is other than None, add it. The IR // verifier checks that DWARF address space only exists for pointer // or reference types. - if (auto AS = DTy->getDWARFAddressSpace()) { - // TODO: Drop address_class once the debugger adopts address_space - for (auto ASTag : - {dwarf::DW_AT_address_class, dwarf::DW_AT_LLVM_address_space}) - addUInt(Buffer, ASTag, dwarf::DW_FORM_data4, *AS); - } + if (auto AS = DTy->getDWARFAddressSpace()) + addUInt(Buffer, dwarf::DW_AT_LLVM_address_space, dwarf::DW_FORM_data4, *AS); // Add template alias template parameters. if (Tag == dwarf::DW_TAG_template_alias) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 0bf460ab53a0c..e7241e460ccaa 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -39,10 +39,9 @@ enum ImplicitArgumentPositions { #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS, enum ImplicitArgumentMask { - UNKNOWN_INTRINSIC = 0, + NOT_IMPLICIT_INPUT = 0, #include "AMDGPUAttributes.def" - ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1, - NOT_IMPLICIT_INPUT + ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1 }; #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str}, @@ -117,7 +116,7 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5); return QUEUE_PTR; default: - return UNKNOWN_INTRINSIC; + return NOT_IMPLICIT_INPUT; } } @@ -536,21 +535,6 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { ImplicitArgumentMask AttrMask = intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit, HasApertureRegs, SupportsGetDoorbellID, COV); - - if (AttrMask == UNKNOWN_INTRINSIC) { - // Assume not-nocallback intrinsics may invoke a function which accesses - // implicit arguments. - // - // FIXME: This isn't really the correct check. We want to ensure it - // isn't calling any function that may use implicit arguments regardless - // of whether it's internal to the module or not. - // - // TODO: Ignoring callsite attributes. - if (!Callee->hasFnAttribute(Attribute::NoCallback)) - return indicatePessimisticFixpoint(); - continue; - } - if (AttrMask != NOT_IMPLICIT_INPUT) { if ((IsNonEntryFunc || !NonKernelOnly)) removeAssumedBits(AttrMask); @@ -1374,10 +1358,7 @@ struct AAAMDGPUMinAGPRAlloc default: // Some intrinsics may use AGPRs, but if we have a choice, we are not // required to use AGPRs. - - // Assume !nocallback intrinsics may call a function which requires - // AGPRs. - return CB.hasFnAttr(Attribute::NoCallback); + return true; } // TODO: Handle callsite attributes diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index 8f17f9c2760ef..5bf9b3a822f36 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -649,7 +649,8 @@ class AMDGPULowerModuleLDS { ModuleScopeVariables.insert(GV); } else if (K.second.size() == 1) { KernelAccessVariables.insert(GV); - } else if (K.second == HybridModuleRootKernels) { + } else if (K.second == HybridModuleRootKernels && + set_is_subset(K.second, HybridModuleRootKernels)) { ModuleScopeVariables.insert(GV); } else { TableLookupVariables.insert(GV); diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index b7a92a0a1d634..0d206aba33543 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -523,6 +523,7 @@ enum Id { // HwRegCode, (6) [5:0] ID_HW_ID1 = 23, ID_HW_ID2 = 24, ID_POPS_PACKER = 25, + ID_SCHED_MODE = 26, ID_PERF_SNAPSHOT_DATA_gfx11 = 27, ID_IB_STS2 = 28, ID_SHADER_CYCLES = 29, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index 6489e63d4f6b8..ce782b025464e 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -211,6 +211,7 @@ static constexpr CustomOperand Operands[] = { {{"HW_REG_HW_ID2"}, ID_HW_ID2, isGFX10Plus}, {{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940}, {{"HW_REG_POPS_PACKER"}, ID_POPS_PACKER, isGFX10}, + {{"HW_REG_WAVE_SCHED_MODE"}, ID_SCHED_MODE, isGFX12Plus}, {{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11}, {{"HW_REG_IB_STS2"}, ID_IB_STS2, isGFX1250}, {{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_3_GFX11}, diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 491685f9a032b..65abd97c6d642 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -83,6 +83,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include @@ -5280,6 +5281,34 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // FIXME: We might want to defer PHI speculation until after here. // FIXME: return nullptr; } else { + // AMDGPU: If the target is AMDGPU and the chosen SliceTy is a HIP vector + // struct of 2 or 4 identical elements, canonicalize it to an IR vector. + // This helps SROA treat it as a single value and unlock vector ld/st. + // We pattern-match struct names starting with "struct.HIP_vector". + if (Function *F = AI.getFunction()) { + Triple TT(F->getParent()->getTargetTriple()); + if (TT.isAMDGPU()) { + if (auto *STy = dyn_cast(SliceTy)) { + StringRef Name = STy->hasName() ? STy->getName() : StringRef(); + if (Name.starts_with("struct.HIP_vector")) { + unsigned NumElts = STy->getNumElements(); + if ((NumElts == 2 || NumElts == 4) && NumElts > 0) { + Type *EltTy = STy->getElementType(0); + bool AllSame = true; + for (unsigned I = 1; I < NumElts; ++I) + if (STy->getElementType(I) != EltTy) { + AllSame = false; + break; + } + if (AllSame && VectorType::isValidElementType(EltTy)) { + SliceTy = FixedVectorType::get(EltTy, NumElts); + } + } + } + } + } + } + // Make sure the alignment is compatible with P.beginOffset(). const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset()); // If we will get at least this much alignment from the type alone, leave diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index ac5bfc5a4f27a..ccf599140fd5d 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -585,7 +585,8 @@ if(build_runtimes) INSTALL_COMMAND "" CMAKE_ARGS -DCMAKE_PREFIX_PATH=${CMAKE_BINARY_DIR}/lib/cmake -DROCM_DEVICE_LIBS_BITCODE_INSTALL_LOC_NEW=${ROCM_DEVICE_LIBS_BITCODE_INSTALL_LOC} - -DROCM_DEVICE_LIBS_BITCODE_INSTALL_LOC_OLD=amdgcn) + -DROCM_DEVICE_LIBS_BITCODE_INSTALL_LOC_OLD=amdgcn + ${extra_cmake_args}) endif() endif() diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll deleted file mode 100644 index d7d623ac89146..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll +++ /dev/null @@ -1,31 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s - -; Make sure we do not infer anything about implicit inputs through an -; intrinsic call which is not nocallback. - -declare zeroext i32 @return_i32() - -define i32 @test_i32_return() gc "statepoint-example" { -; CHECK-LABEL: define i32 @test_i32_return( -; CHECK-SAME: ) #[[ATTR0:[0-9]+]] gc "statepoint-example" { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0) -; CHECK-NEXT: [[CALL1:%.*]] = call zeroext i32 @llvm.experimental.gc.result.i32(token [[SAFEPOINT_TOKEN]]) -; CHECK-NEXT: ret i32 [[CALL1]] -; -entry: - %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0) - %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token) - ret i32 %call1 -} - -declare token @llvm.experimental.gc.statepoint.p0(i64 immarg, i32 immarg, ptr, i32 immarg, i32 immarg, ...) -declare i32 @llvm.experimental.gc.result.i32(token) #0 - -attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } -;. -; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } -;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll deleted file mode 100644 index 71c509afa8e64..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll +++ /dev/null @@ -1,74 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-attributor -mcpu=gfx90a %s | FileCheck %s - -; Make sure we infer no inputs are used through some intrinsics - -define void @use_fake_use(i32 %arg) { -; CHECK-LABEL: define void @use_fake_use( -; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: call void (...) @llvm.fake.use(i32 [[ARG]]) -; CHECK-NEXT: ret void -; - call void (...) @llvm.fake.use(i32 %arg) - ret void -} - -define void @use_donothing() { -; CHECK-LABEL: define void @use_donothing( -; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: ret void -; - call void @llvm.donothing() - ret void -} - -define void @use_assume(i1 %arg) { -; CHECK-LABEL: define void @use_assume( -; CHECK-SAME: i1 [[ARG:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: call void @llvm.assume(i1 [[ARG]]) -; CHECK-NEXT: ret void -; - call void @llvm.assume(i1 %arg) - ret void -} - -define void @use_trap() { -; CHECK-LABEL: define void @use_trap( -; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: call void @llvm.trap() -; CHECK-NEXT: ret void -; - call void @llvm.trap() - ret void -} - -define void @use_debugtrap() { -; CHECK-LABEL: define void @use_debugtrap( -; CHECK-SAME: ) #[[ATTR1]] { -; CHECK-NEXT: call void @llvm.debugtrap() -; CHECK-NEXT: ret void -; - call void @llvm.debugtrap() - ret void -} - -define void @use_ubsantrap() { -; CHECK-LABEL: define void @use_ubsantrap( -; CHECK-SAME: ) #[[ATTR1]] { -; CHECK-NEXT: call void @llvm.ubsantrap(i8 0) -; CHECK-NEXT: ret void -; - call void @llvm.ubsantrap(i8 0) - ret void -} - -;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR6:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" } -;. diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-precise-allocate-to-module-struct.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-precise-allocate-to-module-struct.ll index bd29e9e5855ff..8fec92ca8cfd9 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-precise-allocate-to-module-struct.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-precise-allocate-to-module-struct.ll @@ -138,4 +138,3 @@ define amdgpu_kernel void @kern_block_direct_allocation() { ; CHECK: attributes #[[ATTR1]] = { "amdgpu-lds-size"="16" } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -;. diff --git a/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-diop-diexpression-address-spaces.ll b/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-diop-diexpression-address-spaces.ll index d9d143d4823b9..05d3583197f77 100644 --- a/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-diop-diexpression-address-spaces.ll +++ b/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-diop-diexpression-address-spaces.ll @@ -123,7 +123,6 @@ attributes #0 = { "frame-pointer"="all" } ; CHECK: [[PTR_AS_3]]: DW_TAG_pointer_type ; CHECK-NEXT: DW_AT_type -; CHECK-NEXT: DW_AT_address_class (0x00000003) ; CHECK-NEXT: DW_AT_LLVM_address_space (0x00000003 "DW_ASPACE_LLVM_AMDGPU_local") ; CHECK: [[PTR_AS_NONE]]: DW_TAG_pointer_type @@ -132,7 +131,6 @@ attributes #0 = { "frame-pointer"="all" } ; CHECK: [[PTR_AS_5]]: DW_TAG_pointer_type ; CHECK-NEXT: DW_AT_type -; CHECK-NEXT: DW_AT_address_class (0x00000005) ; CHECK-NEXT: DW_AT_LLVM_address_space (0x00000005 "DW_ASPACE_LLVM_AMDGPU_private_lane") !llvm.dbg.cu = !{!0} diff --git a/llvm/test/DebugInfo/AMDGPU/pointer-address-space.ll b/llvm/test/DebugInfo/AMDGPU/pointer-address-space.ll index 60df8365e321e..3e8e80e442e5b 100644 --- a/llvm/test/DebugInfo/AMDGPU/pointer-address-space.ll +++ b/llvm/test/DebugInfo/AMDGPU/pointer-address-space.ll @@ -50,13 +50,11 @@ ; CHECK: 0x[[LOCAL]]: DW_TAG_pointer_type ; CHECK-NEXT: DW_AT_type -; CHECK-NEXT: DW_AT_address_class [DW_FORM_data4] (0x00000002) ; CHECK-NEXT: DW_AT_LLVM_address_space [DW_FORM_data4] (0x00000002 "DW_ASPACE_LLVM_AMDGPU_region") ; CHECK-NEXT: DW_AT_LLVM_memory_space [DW_FORM_data4] (DW_MSPACE_LLVM_group) ; CHECK: 0x[[PRIVATE]]: DW_TAG_pointer_type ; CHECK-NEXT: DW_AT_type -; CHECK-NEXT: DW_AT_address_class [DW_FORM_data4] (0x00000001) ; CHECK-NEXT: DW_AT_LLVM_address_space [DW_FORM_data4] (0x00000001 "DW_ASPACE_LLVM_AMDGPU_generic") ; CHECK-NEXT: DW_AT_LLVM_memory_space [DW_FORM_data4] (DW_MSPACE_LLVM_private) diff --git a/llvm/test/DebugInfo/Generic/address_space_rvalue.ll b/llvm/test/DebugInfo/Generic/address_space_rvalue.ll index 38798c11b5667..b16ac7e6ce987 100644 --- a/llvm/test/DebugInfo/Generic/address_space_rvalue.ll +++ b/llvm/test/DebugInfo/Generic/address_space_rvalue.ll @@ -6,7 +6,8 @@ ; CHECK: DW_TAG_rvalue_reference_type ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_address_class (0x00000001) +; CHECK-NOT: DW_AT_address_class +; CHECK: DW_AT_LLVM_address_space (0x00000001) @y = global ptr null, align 8, !dbg !0 diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s b/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s index 819ecb866c5ae..ba5159482df50 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s @@ -258,3 +258,12 @@ s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_LO) s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_HI) // GFX12: encoding: [0x1e,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_WAVE_SCHED_MODE) +// GFX12: encoding: [0x1a,0xf8,0x80,0xb8] + +s_setreg_b32 hwreg(HW_REG_WAVE_SCHED_MODE, 0, 2), s2 +// GFX12: encoding: [0x1a,0x08,0x02,0xb9] + +s_setreg_imm32_b32 hwreg(HW_REG_WAVE_SCHED_MODE), 0x2 +// GFX12: encoding: [0x1a,0xf8,0x80,0xb9,0x02,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt index 41c5724a596f9..63ad07acee36f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt @@ -276,3 +276,12 @@ # GFX12: s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_HI) ; encoding: [0x1e,0xf8,0x80,0xb8] 0x1e,0xf8,0x80,0xb8 + +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_SCHED_MODE) ; encoding: [0x1a,0xf8,0x80,0xb8] +0x1a,0xf8,0x80,0xb8 + +# GFX12: s_setreg_b32 hwreg(HW_REG_WAVE_SCHED_MODE, 0, 2), s2 ; encoding: [0x1a,0x08,0x02,0xb9] +0x1a,0x08,0x02,0xb9 + +# GFX12: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_SCHED_MODE), 2 ; encoding: [0x1a,0xf8,0x80,0xb9,0x02,0x00,0x00,0x00] +0x1a,0xf8,0x80,0xb9,0x02,0x00,0x00,0x00 diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt new file mode 100644 index 0000000000000..f6b0d87ba563d --- /dev/null +++ b/offload/DeviceRTL/CMakeLists.txt @@ -0,0 +1,232 @@ +set(LIBOMPTARGET_BUILD_DEVICERTL_BCLIB TRUE CACHE BOOL + "Can be set to false to disable building this library.") + +if (NOT LIBOMPTARGET_BUILD_DEVICERTL_BCLIB) + message(STATUS "Not building DeviceRTL: Disabled by LIBOMPTARGET_BUILD_DEVICERTL_BCLIB") + return() +endif() + +# Check to ensure the host system is a supported host architecture. +if(NOT ${CMAKE_SIZEOF_VOID_P} EQUAL "8") + message(STATUS "Not building DeviceRTL: Runtime does not support 32-bit hosts") + return() +endif() + +if (LLVM_DIR) + # Builds that use pre-installed LLVM have LLVM_DIR set. + # A standalone or LLVM_ENABLE_RUNTIMES=openmp build takes this route + find_program(CLANG_TOOL clang PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) +elseif (LLVM_TOOL_CLANG_BUILD AND NOT CMAKE_CROSSCOMPILING AND NOT OPENMP_STANDALONE_BUILD) + # LLVM in-tree builds may use CMake target names to discover the tools. + # A LLVM_ENABLE_PROJECTS=openmp build takes this route + set(CLANG_TOOL $) +else() + message(STATUS "Not building DeviceRTL. No appropriate clang found") + return() +endif() + +set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR}) +set(include_directory ${devicertl_base_directory}/include) +set(source_directory ${devicertl_base_directory}/src) + +if(OFFLOAD_ENABLE_EMISSARY_APIS) + set(emissary_includes ${include_directory}/EmissaryIds.h) + set(emissary_sources src/EmissaryFortrt.cpp src/EmissaryPrint.cpp) +endif() + +set(include_files + ${include_directory}/Allocator.h + ${include_directory}/Configuration.h + ${include_directory}/Platform.h + ${include_directory}/Debug.h + ${include_directory}/Interface.h + ${include_directory}/LibC.h + ${include_directory}/Mapping.h + ${include_directory}/Profiling.h + ${include_directory}/State.h + ${include_directory}/Synchronization.h + ${include_directory}/DeviceTypes.h + ${include_directory}/DeviceUtils.h + ${include_directory}/Xteamr.h + ${include_directory}/Xteams.h + ${include_directory}/Workshare.h + ${emissary_includes} +) + +set(src_files + ${source_directory}/Allocator.cpp + ${source_directory}/Configuration.cpp + ${source_directory}/Debug.cpp + ${source_directory}/Kernel.cpp + ${source_directory}/LibC.cpp + ${source_directory}/LibM.cpp + ${source_directory}/Mapping.cpp + ${source_directory}/Misc.cpp + ${source_directory}/Parallelism.cpp + ${source_directory}/Profiling.cpp + ${source_directory}/Reduction.cpp + ${source_directory}/State.cpp + ${source_directory}/Synchronization.cpp + ${source_directory}/Tasking.cpp + ${source_directory}/DeviceUtils.cpp + ${source_directory}/Workshare.cpp + ${source_directory}/ExtraMapping.cpp + ${source_directory}/Xteamr.cpp + ${source_directory}/Memory.cpp + ${source_directory}/Xteams.cpp + ${emissary_sources} +) + +# We disable the slp vectorizer during the runtime optimization to avoid +# vectorized accesses to the shared state. Generally, those are "good" but +# the optimizer pipeline (esp. Attributor) does not fully support vectorized +# instructions yet and we end up missing out on way more important constant +# propagation. That said, we will run the vectorizer again after the runtime +# has been linked into the user program. +set(clang_opt_flags -O3 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=512 -mllvm -vectorize-slp=false ) + +# If the user built with the GPU C library enabled we will use that instead. +if(${LIBOMPTARGET_GPU_LIBC_SUPPORT}) + list(APPEND clang_opt_flags -DOMPTARGET_HAS_LIBC) +endif() + +# Set flags for LLVM Bitcode compilation. +set(bc_flags -c -flto -std=c++17 -fvisibility=hidden + ${clang_opt_flags} -nogpulib -nostdlibinc + -fno-rtti -fno-exceptions -fconvergent-functions + -Wno-unknown-cuda-version + -DOMPTARGET_DEVICE_RUNTIME + -I${include_directory} + -I${devicertl_base_directory}/../include + -I${devicertl_base_directory}/../../libc +) + +if(${LIBOMPTARGET_DEVICE_DEBUG}) + list(APPEND bc_flags -DOMPTARGET_DEBUG=-1) +else() + list(APPEND bc_flags -DOMPTARGET_DEBUG=0) +endif() + +# first create an object target +add_library(omptarget.devicertl.all_objs OBJECT IMPORTED) +function(compileDeviceRTLLibrary target_name target_triple) + set(target_bc_flags ${ARGN}) + + if(${target_name} MATCHES "amdgpu") + find_package(AMDDeviceLibs REQUIRED CONFIG + HINTS ${CMAKE_BINARY_DIR}/../../tools/rocm-device-libs + ${CMAKE_BINARY_DIR}/../rocm-device-libs-prefix/src/rocm-device-libs-build + ${CMAKE_INSTALL_PREFIX} + ) + get_target_property(_ocml_bc ocml IMPORTED_LOCATION) + get_target_property(_ockl_bc ockl IMPORTED_LOCATION) + if(NOT _ockl_bc) + message(FATAL_ERROR "Could not find ockl.bc") + endif() + if(NOT _ocml_bc) + message(FATAL_ERROR "Could not find ocml.bc") + endif() + list(APPEND target_bc_flags -Xclang -mlink-builtin-bitcode -Xclang ${_ockl_bc}) + list(APPEND target_bc_flags -Xclang -mlink-builtin-bitcode -Xclang ${_ocml_bc}) + endif() + + foreach(src ${src_files}) + get_filename_component(infile ${src} ABSOLUTE) + get_filename_component(outfile ${src} NAME) + set(outfile "${outfile}-${target_name}.o") + set(depfile "${outfile}.d") + + # Passing an empty CPU to -march= suppressed target specific metadata. + add_custom_command(OUTPUT ${outfile} + COMMAND ${CLANG_TOOL} + ${bc_flags} + --target=${target_triple} + ${target_bc_flags} + -MD -MF ${depfile} + ${infile} -o ${outfile} + DEPENDS ${infile} + DEPFILE ${depfile} + COMMENT "Building LLVM bitcode ${outfile}" + VERBATIM + ) + if(TARGET clang) + # Add a file-level dependency to ensure that clang is up-to-date. + # By default, add_custom_command only builds clang if the + # executable is missing. + add_custom_command(OUTPUT ${outfile} + DEPENDS clang + APPEND + ) + endif() + set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile}) + + list(APPEND obj_files ${CMAKE_CURRENT_BINARY_DIR}/${outfile}) + endforeach() + # Trick to combine these into a bitcode file via the linker's LTO pass. This + # is used to provide the legacy `libomptarget-.bc` files. Hack this + # through as an executable to get it to use the relocatable link. + add_executable(libomptarget-${target_name}) + target_sources(libomptarget-${target_name} PRIVATE ${obj_files}) + set_target_properties(libomptarget-${target_name} PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${LIBOMPTARGET_LLVM_LIBRARY_INTDIR} + LINKER_LANGUAGE CXX + BUILD_RPATH "" + INSTALL_RPATH "" + RUNTIME_OUTPUT_NAME libomptarget-${target_name}.bc) + target_compile_options(libomptarget-${target_name} PRIVATE + "--target=${target_triple}" "-fuse-ld=lld" "-march=" "-mcpu=" + "-Wno-unused-command-line-argument") + target_link_options(libomptarget-${target_name} PRIVATE + "--target=${target_triple}" "-r" "-nostdlib" "-flto" "-Wl,--lto-emit-llvm" + "-Wl,--lto-newpm-passes=default" "-Wl,-plugin-opt=-openmp-opt-disable" + "-Wl,-plugin-opt=-attributor-enable=module" + "-Wl,-plugin-opt=-vectorize-slp=false" "-fuse-ld=lld" "-march=" "-mcpu=") + install(TARGETS libomptarget-${target_name} + PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ + DESTINATION "lib${LLVM_LIBDIR_SUFFIX}/${target_triple}") + + add_library(omptarget.${target_name}.all_objs OBJECT IMPORTED) + set_property(TARGET omptarget.${target_name}.all_objs APPEND PROPERTY IMPORTED_OBJECTS + ${LIBOMPTARGET_LLVM_LIBRARY_INTDIR}/libomptarget-${target_name}.bc) + add_dependencies(omptarget.${target_name}.all_objs libomptarget-${target_name}) + + # Archive all the object files generated above into a static library + add_library(omptarget.${target_name} STATIC) + set_target_properties(omptarget.${target_name} PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY "${LIBOMPTARGET_LLVM_LIBRARY_INTDIR}/${target_triple}" + ARCHIVE_OUTPUT_NAME ompdevice + LINKER_LANGUAGE CXX + ) + add_dependencies(omptarget.${target_name} libomptarget-${target_name}) + target_link_libraries(omptarget.${target_name} PRIVATE omptarget.${target_name}.all_objs) + target_link_options(omptarget.${target_name} PRIVATE "--target=${target_triple}" + "-Wno-unused-command-line-argument" "-r" "-nostdlib" "-flto" + "-Wl,--lto-emit-llvm" "-fuse-ld=lld" "-march=" "-mcpu=") + + install(TARGETS omptarget.${target_name} + ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}/${target_triple}") + + if (CMAKE_EXPORT_COMPILE_COMMANDS) + set(ide_target_name omptarget-ide-${target_name}) + add_library(${ide_target_name} STATIC EXCLUDE_FROM_ALL ${src_files}) + target_compile_options(${ide_target_name} PRIVATE + -fvisibility=hidden --target=${target_triple} + -nogpulib -nostdlibinc -Wno-unknown-cuda-version + ) + target_compile_definitions(${ide_target_name} PRIVATE SHARED_SCRATCHPAD_SIZE=512) + target_include_directories(${ide_target_name} PRIVATE + ${include_directory} + ${devicertl_base_directory}/../../libc + ${devicertl_base_directory}/../include + ) + install(TARGETS ${ide_target_name} EXCLUDE_FROM_ALL) + endif() +endfunction() + +if(NOT LLVM_TARGETS_TO_BUILD OR "AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD) + compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa -Xclang -mcode-object-version=none) +endif() + +if(NOT LLVM_TARGETS_TO_BUILD OR "NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) + compileDeviceRTLLibrary(nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63) +endif() diff --git a/offload/liboffload/CMakeLists.txt b/offload/liboffload/CMakeLists.txt index efb800f2495f9..613c6373d0e4d 100644 --- a/offload/liboffload/CMakeLists.txt +++ b/offload/liboffload/CMakeLists.txt @@ -39,10 +39,18 @@ target_compile_definitions(LLVMOffload PRIVATE DEBUG_PREFIX="Liboffload" ) -set_target_properties(LLVMOffload PROPERTIES - POSITION_INDEPENDENT_CODE ON - INSTALL_RPATH "$ORIGIN" - BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/..") +# Don't override an externally defined RPATH +if(NOT DEFINED CMAKE_INSTALL_RPATH) + set_target_properties(LLVMOffload PROPERTIES + POSITION_INDEPENDENT_CODE ON + INSTALL_RPATH "$ORIGIN:$ORIGIN/../lib:$ORIGIN/../../lib" + BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/..") +else() + set_target_properties(LLVMOffload PROPERTIES + POSITION_INDEPENDENT_CODE ON + INSTALL_RPATH ${CMAKE_INSTALL_RPATH} + BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/..") +endif() install(TARGETS LLVMOffload LIBRARY COMPONENT LLVMOffload DESTINATION "${OFFLOAD_INSTALL_LIBDIR}") install(FILES ${CMAKE_CURRENT_BINARY_DIR}/API/OffloadAPI.h DESTINATION ${CMAKE_INSTALL_PREFIX}/include/offload) diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 0388bbba4ee28..6bf8aac70fd4c 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -5135,7 +5135,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { .OMPX_XTeamReductionOccupancyBasedOpt = false, .OMPX_AdjustNumTeamsForXteamRedSmallBlockSize=1}}, // Default config for unknown devices. - {"DEFAULT", {.OMPX_UseMultipleSdmaEngines = true, + {"DEFAULT", {.OMPX_UseMultipleSdmaEngines = false, .OMPX_XteamBlockSize = 512, .OMPX_XTeamReductionOccupancyBasedOpt = false, .OMPX_AdjustNumTeamsForXteamRedSmallBlockSize=1}}};