From 5fe4ce253952ae0db08e3b2ad4ae893c51735684 Mon Sep 17 00:00:00 2001 From: Bain Syrowik Date: Tue, 21 Oct 2025 16:11:49 -0400 Subject: [PATCH 01/13] Add rocWMMA to cmake files --- CMakeLists.txt | 11 ++++ README.md | 1 + docs/development/windows_support.md | 2 +- examples/CMakeLists.txt | 1 + examples/clean_configure_test_project.cmake | 1 + examples/cpp-sdk-user/CMakeLists.txt | 6 ++ math-libs/CMakeLists.txt | 63 +++++++++++++++++++++ 7 files changed, 84 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 963ad7c0173..d671b962fcc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -318,6 +318,17 @@ if(NOT WIN32) cmake_dependent_option(THEROCK_MIOPEN_USE_COMPOSABLE_KERNEL "Enables composable kernel in MIOpen" ON ${THEROCK_ENABLE_COMPOSABLE_KERNEL} OFF) endif() +if(ROCWMMA_VALIDATE_WITH_ROCBLAS OR ROCWMMA_BENCHMARK_WITH_ROCBLAS) + set(_rocwmma_blas_requirements "BLAS") +else() + set(_rocwmma_blas_requirements "") +endif() +therock_add_feature(ROCWMMA + GROUP MATH_LIBS + DESCRIPTION "Enables rocWMMA" + REQUIRES COMPILER HIP_RUNTIME ${_rocwmma_blas_requirements} +) + # ML-Libs Features. therock_add_feature(MIOPEN GROUP ML_LIBS diff --git a/README.md b/README.md index 11066546514..1742cccf0f9 100644 --- a/README.md +++ b/README.md @@ -175,6 +175,7 @@ minimal build): | `-DTHEROCK_ENABLE_MIOPEN=ON` | Enables MIOpen | | `-DTHEROCK_ENABLE_MIOPEN_PLUGIN=ON` | Enables MIOpen_plugin | | `-DTHEROCK_ENABLE_HIPDNN=ON` | Enables hipDNN | +| `-DTHEROCK_ENABLE_ROCWMMA=ON` | Enables rocWMMA | > [!TIP] > Enabling any features will implicitly enable their *minimum* dependencies. Some diff --git a/docs/development/windows_support.md b/docs/development/windows_support.md index a97e69cd6c5..017ff16e5e7 100644 --- a/docs/development/windows_support.md +++ b/docs/development/windows_support.md @@ -64,12 +64,12 @@ mainline, in open source, using MSVC, etc.). | math-libs (BLAS) | [rocSOLVER](https://github.com/ROCm/rocSOLVER) | ✅ | | | math-libs (BLAS) | [hipSOLVER](https://github.com/ROCm/hipSOLVER) | ✅ | | | math-libs (BLAS) | [hipBLAS](https://github.com/ROCm/hipBLAS) | ✅ | | +| math-libs | [rocWMMA](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocwmma) | ✅ | | | | | | | | ml-libs | [Composable Kernel](https://github.com/ROCm/composable_kernel) | ❌ | Unsupported | | ml-libs | [MIOpen](https://github.com/ROCm/MIOpen) | ✅ | | | ml-libs | [hipDNN](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipdnn) | ❌ | Unsupported | | ml-libs | [MIOpen Legacy Plugin](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipdnn/plugins/miopen_legacy_plugin) | ❌ | Unsupported (requires hipDNN) | - ## Building TheRock from source These instructions mostly mirror the instructions in the root diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index e4959d763cf..793e3e40832 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -36,6 +36,7 @@ add_test( "-DTHEROCK_ENABLE_PRIM=${THEROCK_ENABLE_PRIM}" "-DTHEROCK_ENABLE_RAND=${THEROCK_ENABLE_RAND}" "-DTHEROCK_ENABLE_RCCL=${THEROCK_ENABLE_RCCL}" + "-DTHEROCK_ENABLE_ROCWMMA=${THEROCK_ENABLE_ROCWMMA}" "-DTHEROCK_ENABLE_SOLVER=${THEROCK_ENABLE_SOLVER}" "-DTHEROCK_ENABLE_SPARSE=${THEROCK_ENABLE_SPARSE}" ${HIP_CMAKE_ARGS} diff --git a/examples/clean_configure_test_project.cmake b/examples/clean_configure_test_project.cmake index 1ac3c856382..c116ca16a0b 100644 --- a/examples/clean_configure_test_project.cmake +++ b/examples/clean_configure_test_project.cmake @@ -13,6 +13,7 @@ set(propagate_vars THEROCK_ENABLE_PRIM THEROCK_ENABLE_RAND THEROCK_ENABLE_RCCL + THEROCK_ENABLE_ROCWMMA THEROCK_ENABLE_SOLVER THEROCK_ENABLE_SPARSE CMAKE_HIP_PLATFORM diff --git a/examples/cpp-sdk-user/CMakeLists.txt b/examples/cpp-sdk-user/CMakeLists.txt index 82dc38a7655..7c43932549a 100644 --- a/examples/cpp-sdk-user/CMakeLists.txt +++ b/examples/cpp-sdk-user/CMakeLists.txt @@ -24,6 +24,7 @@ option(THEROCK_ENABLE_MIOPEN "Whether MIOpen is available" ON) option(THEROCK_ENABLE_PRIM "Whether rocprim is available" ON) option(THEROCK_ENABLE_RAND "Whether rocrand/hiprand are available" ON) option(THEROCK_ENABLE_RCCL "Whether rccl is available" ON) +option(THEROCK_ENABLE_ROCWMMA "Whether rocwmma is available" ON) option(THEROCK_ENABLE_SOLVER "Whether rocsolver/hipsolver are available" ON) option(THEROCK_ENABLE_SPARSE "Whether rocsparse/hipsparse/hipsparselt are available" ON) option(ENABLE_DEVICE_TEST "Whether to enable testing that requires a device" OFF) @@ -144,3 +145,8 @@ if(THEROCK_ENABLE_HIPDNN) find_package(hipdnn_backend CONFIG REQUIRED) message(STATUS "hipdnn version: ${hipdnn_VERSION}") endif() + +if(THEROCK_ENABLE_ROCWMMA) + find_package(rocWMMA CONFIG REQUIRED) + message(STATUS "rocWMMA version: ${rocWMMA_VERSION}") +endif() diff --git a/math-libs/CMakeLists.txt b/math-libs/CMakeLists.txt index 23eb42139a0..797310ba7ba 100644 --- a/math-libs/CMakeLists.txt +++ b/math-libs/CMakeLists.txt @@ -270,3 +270,66 @@ if(THEROCK_ENABLE_BLAS) add_subdirectory(support) add_subdirectory(BLAS) endif() + +if(THEROCK_ENABLE_ROCWMMA) + ############################################################################## + # rocWMMA + ############################################################################## + + if(THEROCK_ENABLE_BLAS) + set(ROCWMMA_BLAS_OPTIONAL_DEP rocBLAS) + endif() + + if(THEROCK_ROCWMMA_ENABLE_BENCHMARKS) + set(ROCWMMA_ROCM_SMI_OPTIONAL_DEP rocm_smi_lib) + else() + set(THEROCK_ROCWMMA_ENABLE_BENCHMARKS OFF) + endif() + + therock_cmake_subproject_declare(rocWMMA + EXTERNAL_SOURCE_DIR "${THEROCK_ROCM_LIBRARIES_SOURCE_DIR}/projects/rocwmma" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/rocWMMA" + BACKGROUND_BUILD + CMAKE_ARGS + -DHIP_PLATFORM=amd + -DROCM_PATH= + -DROCM_DIR= + "-DROCWMMA_BUILD_TESTS=$" + "-DROCWMMA_VALIDATE_WITH_ROCBLAS=$" + "-DROCWMMA_BENCHMARK_WITH_ROCBLAS=$,$>" + "-DROCWMMA_BUILD_BENCHMARK_TESTS=${THEROCK_ROCWMMA_ENABLE_BENCHMARKS}" + -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON # Needed for Ninja build? + -DROCWMMA_USE_SYSTEM_GOOGLETEST=ON # Use therock-googletest + CMAKE_INCLUDES + therock_explicit_finders.cmake + COMPILER_TOOLCHAIN + amd-hip + BUILD_DEPS + rocm-cmake + therock-googletest + ${ROCWMMA_BLAS_OPTIONAL_DEP} + ${ROCWMMA_ROCM_SMI_OPTIONAL_DEP} + RUNTIME_DEPS + hip-clr + ${ROCWMMA_BLAS_OPTIONAL_DEP} + ${ROCWMMA_ROCM_SMI_OPTIONAL_DEP} + ) + therock_cmake_subproject_glob_c_sources(rocWMMA + SUBDIRS + . + ) + therock_cmake_subproject_activate(rocWMMA) + + therock_provide_artifact(rocWMMA + DESCRIPTOR artifact-rocwmma.toml + COMPONENTS + dbg + dev + doc + lib + run + test + SUBPROJECT_DEPS + rocWMMA + ) +endif(THEROCK_ENABLE_ROCWMMA) From 09a36d8b935be2cf915ad3e5ac8d2c0f80828b0e Mon Sep 17 00:00:00 2001 From: Bain Syrowik Date: Tue, 21 Oct 2025 23:07:57 -0400 Subject: [PATCH 02/13] Add artifact-rocwmma.toml and update artifact handling --- .../fetch_test_configurations.py | 9 +++++++ build_tools/install_rocm_from_artifacts.py | 23 ++++++++++++++++-- docs/development/installing_artifacts.md | 1 + math-libs/CMakeLists.txt | 4 +--- math-libs/artifact-rocwmma.toml | 24 +++++++++++++++++++ 5 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 math-libs/artifact-rocwmma.toml diff --git a/build_tools/github_actions/fetch_test_configurations.py b/build_tools/github_actions/fetch_test_configurations.py index d0470f79b17..db2e2fdee63 100644 --- a/build_tools/github_actions/fetch_test_configurations.py +++ b/build_tools/github_actions/fetch_test_configurations.py @@ -204,6 +204,15 @@ def _get_script_path(script_name: str) -> str: "platform": ["linux"], "total_shards": 1, }, + # rocWMMA tests + "rocwmma": { + "job_name": "rocwmma", + "fetch_artifact_args": "--rocwmma --tests", # Fixme: may need to add --blas + "timeout_minutes": 10, + "test_script": f"python {_get_script_path('test_rocwmma.py')}", + "platform": ["linux"], + "total_shards": 1, + }, } diff --git a/build_tools/install_rocm_from_artifacts.py b/build_tools/install_rocm_from_artifacts.py index 2497a794793..133bb976610 100644 --- a/build_tools/install_rocm_from_artifacts.py +++ b/build_tools/install_rocm_from_artifacts.py @@ -14,8 +14,17 @@ (--artifact-group ARTIFACT_GROUP | --amdgpu_family AMDGPU_FAMILY) [--output-dir OUTPUT_DIR] (--run-id RUN_ID | --release RELEASE | --input-dir INPUT_DIR) - [--blas | --no-blas] [--fft | --no-fft] [--hipdnn | --no-hipdnn] [--miopen | --no-miopen] [--miopen-plugin | --no-miopen-plugin] - [--prim | --no-prim] [--rand | --no-rand] [--rccl | --no-rccl] [--tests | --no-tests] [--base-only] + [--blas | --no-blas] + [--fft | --no-fft] + [--hipdnn | --no-hipdnn] + [--miopen | --no-miopen] + [--miopen-plugin | --no-miopen-plugin] + [--prim | --no-prim] + [--rand | --no-rand] + [--rccl | --no-rccl] + [--rocwmma | --no-rocwmma] + [--tests | --no-tests] + [--base-only] Examples: - Downloads and unpacks the gfx94X S3 artifacts from GitHub CI workflow run 14474448215 @@ -170,6 +179,7 @@ def retrieve_artifacts_by_run_id(args): args.prim, args.rand, args.rccl, + args.rocwmma, ] ): argv.extend(base_artifact_patterns) @@ -193,6 +203,8 @@ def retrieve_artifacts_by_run_id(args): extra_artifacts.append("rand") if args.rccl: extra_artifacts.append("rccl") + if args.rocwmma: + extra_artifacts.append("rocwmma") extra_artifact_patterns = [f"{a}_lib" for a in extra_artifacts] if args.tests: @@ -374,6 +386,13 @@ def main(argv): action=argparse.BooleanOptionalAction, ) + artifacts_group.add_argument( + "--rocwmma", + default=False, + help="Include 'rocwmma' artifacts", + action=argparse.BooleanOptionalAction, + ) + artifacts_group.add_argument( "--tests", default=False, diff --git a/docs/development/installing_artifacts.md b/docs/development/installing_artifacts.md index 90f6834a871..ef062bc79c7 100644 --- a/docs/development/installing_artifacts.md +++ b/docs/development/installing_artifacts.md @@ -19,6 +19,7 @@ The script supports the following command-line options: | `--prim` | Flag | Include primitives artifacts | | `--rand` | Flag | Include random number generator artifacts | | `--rccl` | Flag | Include RCCL artifacts | +| `--rocwmma` | Flag | Include rocWMMA artifacts | | `--release` | String | Release version from nightly or dev tarballs | | `--run-id` | String | GitHub CI workflow run ID to install from | | `--tests` | Flag | Include test artifacts for enabled components | diff --git a/math-libs/CMakeLists.txt b/math-libs/CMakeLists.txt index 797310ba7ba..5b0db1c07d5 100644 --- a/math-libs/CMakeLists.txt +++ b/math-libs/CMakeLists.txt @@ -320,14 +320,12 @@ if(THEROCK_ENABLE_ROCWMMA) ) therock_cmake_subproject_activate(rocWMMA) - therock_provide_artifact(rocWMMA + therock_provide_artifact(rocwmma DESCRIPTOR artifact-rocwmma.toml COMPONENTS dbg dev - doc lib - run test SUBPROJECT_DEPS rocWMMA diff --git a/math-libs/artifact-rocwmma.toml b/math-libs/artifact-rocwmma.toml new file mode 100644 index 00000000000..715aa4ae3fc --- /dev/null +++ b/math-libs/artifact-rocwmma.toml @@ -0,0 +1,24 @@ +# rocWMMA +[components.dbg."math-libs/rocWMMA/stage"] +optional = true +[components.dev."math-libs/rocWMMA/stage"] +include = [ + "include/rocwmma/*", +] +[components.lib."math-libs/rocWMMA/stage"] +optional = true +[components.test."math-libs/rocWMMA/stage"] +include = [ + # Samples + "bin/simple_*", + "bin/perf_*", + "bin/hipRTC_gemm", + "bin/hipRTC_gemm.exe", # Windows + # Tests + "bin/*_test", + "bin/*-validate", + "bin/*_test.exe", # Windows + "bin/*-validate.exe", # Windows + "bin/rocwmma/CTestTestfile.cmake", + "bin/rocwmma/*/CTestTestfile.cmake", +] From 875dd9474bbf4be4ad5b4ad5cfa0fd1872a95e64 Mon Sep 17 00:00:00 2001 From: Bain Syrowik Date: Thu, 6 Nov 2025 13:36:27 -0500 Subject: [PATCH 03/13] Add rocWMMA testing --- .../fetch_test_configurations.py | 8 +-- .../test_executable_scripts/test_rocwmma.py | 58 +++++++++++++++++++ 2 files changed, 62 insertions(+), 4 deletions(-) create mode 100644 build_tools/github_actions/test_executable_scripts/test_rocwmma.py diff --git a/build_tools/github_actions/fetch_test_configurations.py b/build_tools/github_actions/fetch_test_configurations.py index db2e2fdee63..9a1936e04c8 100644 --- a/build_tools/github_actions/fetch_test_configurations.py +++ b/build_tools/github_actions/fetch_test_configurations.py @@ -207,11 +207,11 @@ def _get_script_path(script_name: str) -> str: # rocWMMA tests "rocwmma": { "job_name": "rocwmma", - "fetch_artifact_args": "--rocwmma --tests", # Fixme: may need to add --blas - "timeout_minutes": 10, + "fetch_artifact_args": "--rocwmma --tests --blas", + "timeout_minutes": 60, "test_script": f"python {_get_script_path('test_rocwmma.py')}", - "platform": ["linux"], - "total_shards": 1, + "platform": ["linux", "windows"], + "total_shards": 4, }, } diff --git a/build_tools/github_actions/test_executable_scripts/test_rocwmma.py b/build_tools/github_actions/test_executable_scripts/test_rocwmma.py new file mode 100644 index 00000000000..561ce60225b --- /dev/null +++ b/build_tools/github_actions/test_executable_scripts/test_rocwmma.py @@ -0,0 +1,58 @@ +import logging +import os +import shlex +import subprocess +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +AMDGPU_FAMILIES = os.getenv("AMDGPU_FAMILIES") +platform = os.getenv("RUNNER_OS").lower() +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent + +# GTest sharding +SHARD_INDEX = os.getenv("SHARD_INDEX", 1) +TOTAL_SHARDS = os.getenv("TOTAL_SHARDS", 1) +environ_vars = os.environ.copy() +# For display purposes in the GitHub Action UI, the shard array is 1th indexed. However for shard indexes, we convert it to 0th index. +environ_vars["GTEST_SHARD_INDEX"] = str(int(SHARD_INDEX) - 1) +environ_vars["GTEST_TOTAL_SHARDS"] = str(TOTAL_SHARDS) + +logging.basicConfig(level=logging.INFO) + +# If smoke tests are enabled, we run smoke tests only. +# Otherwise, we run the normal test suite +test_type = os.getenv("TEST_TYPE", "full") + +# If there are devices for which the full set is too slow, we can +# programatically set test_type to "regression" here. + +test_subdir = "" +timeout = "3600" +if test_type == "smoke": + # The emulator regression tests are very fast. + # If we need something even faster we can use "/smoke" here. + test_subdir = "/regression" + timeout = "300" +elif test_type == "regression": + test_subdir = "/regression" + timeout = "900" + +cmd = [ + "ctest", + "--test-dir", + f"{THEROCK_BIN_DIR}/rocwmma{test_subdir}", + "--output-on-failure", + "--parallel", + "8", + "--timeout", + timeout, +] +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") + +subprocess.run( + cmd, + cwd=THEROCK_DIR, + check=True, + env=environ_vars, +) From 498b2c36299d4605e2232ce63681132af198a079 Mon Sep 17 00:00:00 2001 From: Bain Syrowik Date: Thu, 6 Nov 2025 13:37:12 -0500 Subject: [PATCH 04/13] Add rocWMMA patches --- ...changes-for-integration-into-TheRock.patch | 181 ++++++++++++++++++ .../0010-Fix-windows-build.patch | 41 ++++ 2 files changed, 222 insertions(+) create mode 100644 patches/amd-mainline/rocm-libraries/0009-CMake-changes-for-integration-into-TheRock.patch create mode 100644 patches/amd-mainline/rocm-libraries/0010-Fix-windows-build.patch diff --git a/patches/amd-mainline/rocm-libraries/0009-CMake-changes-for-integration-into-TheRock.patch b/patches/amd-mainline/rocm-libraries/0009-CMake-changes-for-integration-into-TheRock.patch new file mode 100644 index 00000000000..365f6f3c3f8 --- /dev/null +++ b/patches/amd-mainline/rocm-libraries/0009-CMake-changes-for-integration-into-TheRock.patch @@ -0,0 +1,181 @@ +From bcf6637ae55feb7d0340186fa985730f780dc2eb Mon Sep 17 00:00:00 2001 +From: Bain Syrowik +Date: Tue, 28 Oct 2025 13:02:46 -0400 +Subject: [PATCH 1/2] CMake changes for integration into TheRock + +--- + projects/rocwmma/CMakeLists.txt | 13 +++++ + projects/rocwmma/samples/CMakeLists.txt | 2 +- + projects/rocwmma/test/CMakeLists.txt | 74 +++++++++++++++++++------ + 3 files changed, 70 insertions(+), 19 deletions(-) + +diff --git a/projects/rocwmma/CMakeLists.txt b/projects/rocwmma/CMakeLists.txt +index 84cb179edb..3ba8652053 100644 +--- a/projects/rocwmma/CMakeLists.txt ++++ b/projects/rocwmma/CMakeLists.txt +@@ -27,6 +27,14 @@ + cmake_minimum_required( VERSION 3.14 ) + message(STATUS "CMake version: ${CMAKE_VERSION}") + ++if(NOT WIN32) ++ if(CMAKE_GENERATOR STREQUAL "Ninja") ++ message(STATUS "The CMake generator is Ninja.") ++ else() ++ message(WARNING "The CMake generator is not Ninja. Ninja is preferred over Make.") ++ endif() ++endif() ++ + # This has to be initialized before the project() command appears + # Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE + if( NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE ) +@@ -190,6 +198,11 @@ rocm_package_add_deb_dependencies("libomp-dev") + rocm_package_add_rpm_dependencies("libomp-devel") + set(CPACK_RPM_PACKAGE_LICENSE "MIT") + ++rocm_export_targets( ++ TARGETS roc::rocwmma ++ NAMESPACE roc:: ++) ++ + rocm_create_package( + NAME rocwmma + DESCRIPTION "AMD GPU C++ library for GEMM primitives using MFMA and WMMA matrix instructions" +diff --git a/projects/rocwmma/samples/CMakeLists.txt b/projects/rocwmma/samples/CMakeLists.txt +index 4f86434355..982fbcd264 100644 +--- a/projects/rocwmma/samples/CMakeLists.txt ++++ b/projects/rocwmma/samples/CMakeLists.txt +@@ -34,7 +34,7 @@ endif() + # Custom target to build all rocWMMA samples + add_custom_target(rocwmma_samples) + +-# Regular gtests that don't require any special ++# Regular binaries that don't require any special + # designation for validation or benchmarking + function(add_rocwmma_sample TEST_TARGET TEST_SOURCE) + +diff --git a/projects/rocwmma/test/CMakeLists.txt b/projects/rocwmma/test/CMakeLists.txt +index 530884c427..0acdaa6934 100644 +--- a/projects/rocwmma/test/CMakeLists.txt ++++ b/projects/rocwmma/test/CMakeLists.txt +@@ -40,33 +40,38 @@ add_link_options(-mcmodel=large) + + # Test/benchmark requires additional dependencies + if(ROCWMMA_USE_SYSTEM_GOOGLETEST) +- find_package(GTest 1.12.1 REQUIRED) ++ find_package(GTest 1.16.0 REQUIRED) + else() + include( FetchContent ) + ++ # Suppress ROCMChecks WARNING on GoogleTest ++ set(ROCM_DISABLE_CHECKS FALSE) ++ macro(rocm_check_toolchain_var var access value list_file) ++ if(NOT ROCM_DISABLE_CHECKS) ++ _rocm_check_toolchain_var("${var}" "${access}" "${value}" "${list_file}") ++ endif() ++ endmacro() ++ + FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git +- GIT_TAG release-1.12.1 ++ GIT_TAG v1.16.0 + ) +- FetchContent_GetProperties(googletest) +- if(NOT googletest_POPULATED) + +- # Fetch the content using default details +- FetchContent_Populate(googletest) +- # Save the shared libs setting, then force to static libs +- set(BUILD_SHARED_LIBS_OLD ${BUILD_SHARED_LIBS}) +- set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "Build SHARED libraries" FORCE) ++ # Turn off gtest installation ++ set(INSTALL_GTEST OFF) + +- # Turn off gtest installation +- set(INSTALL_GTEST OFF) ++ # Save the shared libs setting, then force to static libs ++ set(BUILD_SHARED_LIBS_OLD ${BUILD_SHARED_LIBS}) ++ set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "Build SHARED libraries" FORCE) + +- # Add gtest targets as static libs +- add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR}) ++ # Fetch the content using default details ++ set(ROCM_DISABLE_CHECKS TRUE) ++ FetchContent_MakeAvailable(googletest) ++ set(ROCM_DISABLE_CHECKS FALSE) + +- # Restore shared libs setting +- set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_OLD} CACHE INTERNAL "Build SHARED libraries" FORCE) +- endif() ++ # Restore shared libs setting ++ set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_OLD} CACHE INTERNAL "Build SHARED libraries" FORCE) + endif() + + set(ROCWMMA_TEST_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}) +@@ -80,6 +85,21 @@ file(WRITE "${INSTALL_TEST_FILE}" + # It differs slightly from the default testfile, and you may encounter issues because of that. + ]=] + ) ++set(INSTALL_SMOKE_TEST_FILE "${CMAKE_CURRENT_BINARY_DIR}/install_smoke_CTestTestfile.cmake") ++file(WRITE "${INSTALL_SMOKE_TEST_FILE}" ++[=[ ++# This is a test file generated by rocWMMA for install time. ++# It differs slightly from the default testfile, and you may encounter issues because of that. ++]=] ++) ++set(INSTALL_REGRESSION_TEST_FILE "${CMAKE_CURRENT_BINARY_DIR}/install_regression_CTestTestfile.cmake") ++file(WRITE "${INSTALL_REGRESSION_TEST_FILE}" ++[=[ ++# This is a test file generated by rocWMMA for install time. ++# It differs slightly from the default testfile, and you may encounter issues because of that. ++]=] ++) ++ + + # Regular gtests that don't require any special + # designation for validation or benchmarking +@@ -87,10 +107,10 @@ function(add_rocwmma_test TEST_TARGET TEST_SOURCE) + + list(APPEND TEST_SOURCE ${ARGN}) + add_executable(${TEST_TARGET} ${TEST_SOURCE}) +- target_link_libraries(${TEST_TARGET} rocwmma gtest) ++ target_link_libraries(${TEST_TARGET} rocwmma GTest::gtest) + target_link_libraries(${TEST_TARGET} OpenMP::OpenMP_CXX "-L${HIP_CLANG_ROOT}/lib") + if(UNIX) +- target_link_libraries(${TEST_TARGET} "-Wl,-rpath=$ORIGIN/../llvm/lib" "-fno-rtlib-add-rpath") ++ target_link_libraries(${TEST_TARGET} "-Wl,-rpath='\$$ORIGIN/../llvm/lib'" "-fno-rtlib-add-rpath") + endif() + target_include_directories(${TEST_TARGET} PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} +@@ -139,6 +159,12 @@ function(add_rocwmma_test TEST_TARGET TEST_SOURCE) + endif() + file(APPEND "${INSTALL_TEST_FILE}" "add_test(${TEST_TARGET} \"../${EXE_NAME}\")\n") + file(APPEND "${INSTALL_TEST_FILE}" "set_tests_properties(${TEST_TARGET} PROPERTIES SKIP_REGULAR_EXPRESSION \"no ROCm-capable device;unsupported host device\")\n") ++ ++ file(APPEND "${INSTALL_SMOKE_TEST_FILE}" "add_test(\"${TEST_TARGET} smoke\" \"../../${EXE_NAME}\" --emulation smoke)\n") ++ file(APPEND "${INSTALL_SMOKE_TEST_FILE}" "set_tests_properties(\"${TEST_TARGET} smoke\" PROPERTIES SKIP_REGULAR_EXPRESSION \"no ROCm-capable device;unsupported host device\")\n") ++ ++ file(APPEND "${INSTALL_REGRESSION_TEST_FILE}" "add_test(\"${TEST_TARGET} regression\" \"../../${EXE_NAME}\" --emulation regression)\n") ++ file(APPEND "${INSTALL_REGRESSION_TEST_FILE}" "set_tests_properties(\"${TEST_TARGET} regression\" PROPERTIES SKIP_REGULAR_EXPRESSION \"no ROCm-capable device;unsupported host device\")\n") + endfunction() + + # Targets that implement specifically validation configuration +@@ -165,3 +191,15 @@ rocm_install( + COMPONENT tests + RENAME "CTestTestfile.cmake" + ) ++rocm_install( ++ FILES "${INSTALL_SMOKE_TEST_FILE}" ++ DESTINATION "${CMAKE_INSTALL_BINDIR}/${PROJECT_NAME}/smoke" ++ COMPONENT tests ++ RENAME "CTestTestfile.cmake" ++) ++rocm_install( ++ FILES "${INSTALL_REGRESSION_TEST_FILE}" ++ DESTINATION "${CMAKE_INSTALL_BINDIR}/${PROJECT_NAME}/regression" ++ COMPONENT tests ++ RENAME "CTestTestfile.cmake" ++) +-- +2.43.0 + diff --git a/patches/amd-mainline/rocm-libraries/0010-Fix-windows-build.patch b/patches/amd-mainline/rocm-libraries/0010-Fix-windows-build.patch new file mode 100644 index 00000000000..632aee0daa3 --- /dev/null +++ b/patches/amd-mainline/rocm-libraries/0010-Fix-windows-build.patch @@ -0,0 +1,41 @@ +From f22e5d6c531dad5aea95de3a2c0c09dd8599a1bd Mon Sep 17 00:00:00 2001 +From: Bain Syrowik +Date: Tue, 4 Nov 2025 14:55:39 -0500 +Subject: [PATCH 2/2] Fix windows build + +--- + projects/rocwmma/CMakeLists.txt | 2 +- + projects/rocwmma/test/gemm/CMakeLists.txt | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/projects/rocwmma/CMakeLists.txt b/projects/rocwmma/CMakeLists.txt +index 3ba86520..2af3a278 100644 +--- a/projects/rocwmma/CMakeLists.txt ++++ b/projects/rocwmma/CMakeLists.txt +@@ -51,7 +51,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) + set(CMAKE_CXX_EXTENSIONS OFF) + + #Set Clang C++ flags. +-set(CMAKE_CXX_FLAGS_DEBUG "-g -O2") # clang++ crashes without -O2 ++set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -O2") # clang++ crashes without -O2 + set(CMAKE_CXX_FLAGS_MINSIZEREL "-O2 -DNDEBUG") # clang++ failed to build the project with the default -Os + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --driver-mode=g++ -Xclang -fallow-half-arguments-and-returns -D__HIP_HCC_COMPAT_MODE__=1 -Wno-format-nonliteral -parallel-jobs=4 -fclang-abi-compat=17") + +diff --git a/projects/rocwmma/test/gemm/CMakeLists.txt b/projects/rocwmma/test/gemm/CMakeLists.txt +index 81f99984..319e35f3 100644 +--- a/projects/rocwmma/test/gemm/CMakeLists.txt ++++ b/projects/rocwmma/test/gemm/CMakeLists.txt +@@ -29,8 +29,8 @@ include( CMakeDependentOption ) + cmake_dependent_option( ROCWMMA_VALIDATE_WITH_ROCBLAS "Use rocBLAS for validation" ON "ROCWMMA_BUILD_VALIDATION_TESTS" OFF ) + cmake_dependent_option( ROCWMMA_BENCHMARK_WITH_ROCBLAS "Include rocBLAS benchmark performance comparisons" OFF "ROCWMMA_BUILD_BENCHMARK_TESTS" OFF ) + +-set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CMAKE_COMMAND} -E time") +-set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "${CMAKE_COMMAND} -E time") ++set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "\"${CMAKE_COMMAND}\" -E time") ++set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "\"${CMAKE_COMMAND}\" -E time") + + if(ROCWMMA_VALIDATE_WITH_ROCBLAS OR ROCWMMA_BENCHMARK_WITH_ROCBLAS) + find_package( rocblas REQUIRED PATHS /opt/rocm /opt/rocm/rocblas $ENV{ROCBLAS_DIR} ) +-- +2.43.0 + From 39baa13f9dd99fb173dbbc28da1d6a2b16cafc97 Mon Sep 17 00:00:00 2001 From: Bain Syrowik Date: Mon, 27 Oct 2025 12:02:39 -0400 Subject: [PATCH 05/13] Update rocWMMA GPU target compatibility --- cmake/therock_amdgpu_targets.cmake | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cmake/therock_amdgpu_targets.cmake b/cmake/therock_amdgpu_targets.cmake index 8713dfcbc5a..d19c04cecea 100644 --- a/cmake/therock_amdgpu_targets.cmake +++ b/cmake/therock_amdgpu_targets.cmake @@ -47,6 +47,7 @@ therock_add_amdgpu_target(gfx906 "Radeon VII / MI50 CDNA" FAMILY dgpu-all gfx90X hipBLASLt # https://github.com/ROCm/TheRock/issues/1062 hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042 composable_kernel # https://github.com/ROCm/TheRock/issues/1245 + rocWMMA # https://github.com/ROCm/TheRock/issues/1944 ) therock_add_amdgpu_target(gfx908 "MI100 CDNA" FAMILY gfx90X-all dcgpu-all gfx90X-dcgpu EXCLUDE_TARGET_PROJECTS @@ -69,18 +70,21 @@ therock_add_amdgpu_target(gfx1010 "AMD RX 5700" FAMILY dgpu-all gfx101X-all gfx1 hipBLASLt # https://github.com/ROCm/TheRock/issues/1062 hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042 composable_kernel # https://github.com/ROCm/TheRock/issues/1245 + rocWMMA # https://github.com/ROCm/TheRock/issues/1944 ) therock_add_amdgpu_target(gfx1011 "AMD Radeon Pro V520" FAMILY dgpu-all gfx101X-all gfx101X-dgpu EXCLUDE_TARGET_PROJECTS hipBLASLt # https://github.com/ROCm/TheRock/issues/1062 hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042 composable_kernel # https://github.com/ROCm/TheRock/issues/1245 + rocWMMA # https://github.com/ROCm/TheRock/issues/1944 ) therock_add_amdgpu_target(gfx1012 "AMD RX 5500" FAMILY dgpu-all gfx101X-all gfx101X-dgpu EXCLUDE_TARGET_PROJECTS hipBLASLt # https://github.com/ROCm/TheRock/issues/1062 hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042 composable_kernel # https://github.com/ROCm/TheRock/issues/1245 + rocWMMA # https://github.com/ROCm/TheRock/issues/1944 ) # gfx103X family @@ -88,21 +92,25 @@ therock_add_amdgpu_target(gfx1030 "AMD RX 6800 / XT" FAMILY dgpu-all gfx103X-all EXCLUDE_TARGET_PROJECTS hipBLASLt # https://github.com/ROCm/TheRock/issues/1062 hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042 + rocWMMA # https://github.com/ROCm/TheRock/issues/1944 ) therock_add_amdgpu_target(gfx1032 "AMD RX 6600" FAMILY dgpu-all gfx103X-all gfx103X-dgpu EXCLUDE_TARGET_PROJECTS hipBLASLt # https://github.com/ROCm/TheRock/issues/1062 hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042 + rocWMMA # https://github.com/ROCm/TheRock/issues/1944 ) therock_add_amdgpu_target(gfx1035 "AMD Radeon 680M Laptop iGPU" igpu-all FAMILY gfx103X-all gfx103X-igpu EXCLUDE_TARGET_PROJECTS hipBLASLt # https://github.com/ROCm/TheRock/issues/1062 hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042 + rocWMMA # https://github.com/ROCm/TheRock/issues/1944 ) therock_add_amdgpu_target(gfx1036 "AMD Raphael iGPU" FAMILY igpu-all gfx103X-all gfx103X-igpu EXCLUDE_TARGET_PROJECTS hipBLASLt # https://github.com/ROCm/TheRock/issues/1062 hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042 + rocWMMA # https://github.com/ROCm/TheRock/issues/1944 ) # gfx110X family @@ -123,6 +131,7 @@ therock_add_amdgpu_target(gfx1103 "AMD Radeon 780M Laptop iGPU" FAMILY igpu-all EXCLUDE_TARGET_PROJECTS hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042 rccl # https://github.com/ROCm/TheRock/issues/150 + rocWMMA # https://github.com/ROCm/TheRock/issues/1944 ) # gfx115X family From 98004c99919925c9cd0c45229164c81ce53a4b43 Mon Sep 17 00:00:00 2001 From: Bain Syrowik Date: Fri, 7 Nov 2025 16:48:44 -0500 Subject: [PATCH 06/13] Remove patches now that rocm-libraries is updated --- ...changes-for-integration-into-TheRock.patch | 181 ------------------ .../0010-Fix-windows-build.patch | 41 ---- 2 files changed, 222 deletions(-) delete mode 100644 patches/amd-mainline/rocm-libraries/0009-CMake-changes-for-integration-into-TheRock.patch delete mode 100644 patches/amd-mainline/rocm-libraries/0010-Fix-windows-build.patch diff --git a/patches/amd-mainline/rocm-libraries/0009-CMake-changes-for-integration-into-TheRock.patch b/patches/amd-mainline/rocm-libraries/0009-CMake-changes-for-integration-into-TheRock.patch deleted file mode 100644 index 365f6f3c3f8..00000000000 --- a/patches/amd-mainline/rocm-libraries/0009-CMake-changes-for-integration-into-TheRock.patch +++ /dev/null @@ -1,181 +0,0 @@ -From bcf6637ae55feb7d0340186fa985730f780dc2eb Mon Sep 17 00:00:00 2001 -From: Bain Syrowik -Date: Tue, 28 Oct 2025 13:02:46 -0400 -Subject: [PATCH 1/2] CMake changes for integration into TheRock - ---- - projects/rocwmma/CMakeLists.txt | 13 +++++ - projects/rocwmma/samples/CMakeLists.txt | 2 +- - projects/rocwmma/test/CMakeLists.txt | 74 +++++++++++++++++++------ - 3 files changed, 70 insertions(+), 19 deletions(-) - -diff --git a/projects/rocwmma/CMakeLists.txt b/projects/rocwmma/CMakeLists.txt -index 84cb179edb..3ba8652053 100644 ---- a/projects/rocwmma/CMakeLists.txt -+++ b/projects/rocwmma/CMakeLists.txt -@@ -27,6 +27,14 @@ - cmake_minimum_required( VERSION 3.14 ) - message(STATUS "CMake version: ${CMAKE_VERSION}") - -+if(NOT WIN32) -+ if(CMAKE_GENERATOR STREQUAL "Ninja") -+ message(STATUS "The CMake generator is Ninja.") -+ else() -+ message(WARNING "The CMake generator is not Ninja. Ninja is preferred over Make.") -+ endif() -+endif() -+ - # This has to be initialized before the project() command appears - # Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE - if( NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE ) -@@ -190,6 +198,11 @@ rocm_package_add_deb_dependencies("libomp-dev") - rocm_package_add_rpm_dependencies("libomp-devel") - set(CPACK_RPM_PACKAGE_LICENSE "MIT") - -+rocm_export_targets( -+ TARGETS roc::rocwmma -+ NAMESPACE roc:: -+) -+ - rocm_create_package( - NAME rocwmma - DESCRIPTION "AMD GPU C++ library for GEMM primitives using MFMA and WMMA matrix instructions" -diff --git a/projects/rocwmma/samples/CMakeLists.txt b/projects/rocwmma/samples/CMakeLists.txt -index 4f86434355..982fbcd264 100644 ---- a/projects/rocwmma/samples/CMakeLists.txt -+++ b/projects/rocwmma/samples/CMakeLists.txt -@@ -34,7 +34,7 @@ endif() - # Custom target to build all rocWMMA samples - add_custom_target(rocwmma_samples) - --# Regular gtests that don't require any special -+# Regular binaries that don't require any special - # designation for validation or benchmarking - function(add_rocwmma_sample TEST_TARGET TEST_SOURCE) - -diff --git a/projects/rocwmma/test/CMakeLists.txt b/projects/rocwmma/test/CMakeLists.txt -index 530884c427..0acdaa6934 100644 ---- a/projects/rocwmma/test/CMakeLists.txt -+++ b/projects/rocwmma/test/CMakeLists.txt -@@ -40,33 +40,38 @@ add_link_options(-mcmodel=large) - - # Test/benchmark requires additional dependencies - if(ROCWMMA_USE_SYSTEM_GOOGLETEST) -- find_package(GTest 1.12.1 REQUIRED) -+ find_package(GTest 1.16.0 REQUIRED) - else() - include( FetchContent ) - -+ # Suppress ROCMChecks WARNING on GoogleTest -+ set(ROCM_DISABLE_CHECKS FALSE) -+ macro(rocm_check_toolchain_var var access value list_file) -+ if(NOT ROCM_DISABLE_CHECKS) -+ _rocm_check_toolchain_var("${var}" "${access}" "${value}" "${list_file}") -+ endif() -+ endmacro() -+ - FetchContent_Declare( - googletest - GIT_REPOSITORY https://github.com/google/googletest.git -- GIT_TAG release-1.12.1 -+ GIT_TAG v1.16.0 - ) -- FetchContent_GetProperties(googletest) -- if(NOT googletest_POPULATED) - -- # Fetch the content using default details -- FetchContent_Populate(googletest) -- # Save the shared libs setting, then force to static libs -- set(BUILD_SHARED_LIBS_OLD ${BUILD_SHARED_LIBS}) -- set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "Build SHARED libraries" FORCE) -+ # Turn off gtest installation -+ set(INSTALL_GTEST OFF) - -- # Turn off gtest installation -- set(INSTALL_GTEST OFF) -+ # Save the shared libs setting, then force to static libs -+ set(BUILD_SHARED_LIBS_OLD ${BUILD_SHARED_LIBS}) -+ set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "Build SHARED libraries" FORCE) - -- # Add gtest targets as static libs -- add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR}) -+ # Fetch the content using default details -+ set(ROCM_DISABLE_CHECKS TRUE) -+ FetchContent_MakeAvailable(googletest) -+ set(ROCM_DISABLE_CHECKS FALSE) - -- # Restore shared libs setting -- set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_OLD} CACHE INTERNAL "Build SHARED libraries" FORCE) -- endif() -+ # Restore shared libs setting -+ set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_OLD} CACHE INTERNAL "Build SHARED libraries" FORCE) - endif() - - set(ROCWMMA_TEST_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}) -@@ -80,6 +85,21 @@ file(WRITE "${INSTALL_TEST_FILE}" - # It differs slightly from the default testfile, and you may encounter issues because of that. - ]=] - ) -+set(INSTALL_SMOKE_TEST_FILE "${CMAKE_CURRENT_BINARY_DIR}/install_smoke_CTestTestfile.cmake") -+file(WRITE "${INSTALL_SMOKE_TEST_FILE}" -+[=[ -+# This is a test file generated by rocWMMA for install time. -+# It differs slightly from the default testfile, and you may encounter issues because of that. -+]=] -+) -+set(INSTALL_REGRESSION_TEST_FILE "${CMAKE_CURRENT_BINARY_DIR}/install_regression_CTestTestfile.cmake") -+file(WRITE "${INSTALL_REGRESSION_TEST_FILE}" -+[=[ -+# This is a test file generated by rocWMMA for install time. -+# It differs slightly from the default testfile, and you may encounter issues because of that. -+]=] -+) -+ - - # Regular gtests that don't require any special - # designation for validation or benchmarking -@@ -87,10 +107,10 @@ function(add_rocwmma_test TEST_TARGET TEST_SOURCE) - - list(APPEND TEST_SOURCE ${ARGN}) - add_executable(${TEST_TARGET} ${TEST_SOURCE}) -- target_link_libraries(${TEST_TARGET} rocwmma gtest) -+ target_link_libraries(${TEST_TARGET} rocwmma GTest::gtest) - target_link_libraries(${TEST_TARGET} OpenMP::OpenMP_CXX "-L${HIP_CLANG_ROOT}/lib") - if(UNIX) -- target_link_libraries(${TEST_TARGET} "-Wl,-rpath=$ORIGIN/../llvm/lib" "-fno-rtlib-add-rpath") -+ target_link_libraries(${TEST_TARGET} "-Wl,-rpath='\$$ORIGIN/../llvm/lib'" "-fno-rtlib-add-rpath") - endif() - target_include_directories(${TEST_TARGET} PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR} -@@ -139,6 +159,12 @@ function(add_rocwmma_test TEST_TARGET TEST_SOURCE) - endif() - file(APPEND "${INSTALL_TEST_FILE}" "add_test(${TEST_TARGET} \"../${EXE_NAME}\")\n") - file(APPEND "${INSTALL_TEST_FILE}" "set_tests_properties(${TEST_TARGET} PROPERTIES SKIP_REGULAR_EXPRESSION \"no ROCm-capable device;unsupported host device\")\n") -+ -+ file(APPEND "${INSTALL_SMOKE_TEST_FILE}" "add_test(\"${TEST_TARGET} smoke\" \"../../${EXE_NAME}\" --emulation smoke)\n") -+ file(APPEND "${INSTALL_SMOKE_TEST_FILE}" "set_tests_properties(\"${TEST_TARGET} smoke\" PROPERTIES SKIP_REGULAR_EXPRESSION \"no ROCm-capable device;unsupported host device\")\n") -+ -+ file(APPEND "${INSTALL_REGRESSION_TEST_FILE}" "add_test(\"${TEST_TARGET} regression\" \"../../${EXE_NAME}\" --emulation regression)\n") -+ file(APPEND "${INSTALL_REGRESSION_TEST_FILE}" "set_tests_properties(\"${TEST_TARGET} regression\" PROPERTIES SKIP_REGULAR_EXPRESSION \"no ROCm-capable device;unsupported host device\")\n") - endfunction() - - # Targets that implement specifically validation configuration -@@ -165,3 +191,15 @@ rocm_install( - COMPONENT tests - RENAME "CTestTestfile.cmake" - ) -+rocm_install( -+ FILES "${INSTALL_SMOKE_TEST_FILE}" -+ DESTINATION "${CMAKE_INSTALL_BINDIR}/${PROJECT_NAME}/smoke" -+ COMPONENT tests -+ RENAME "CTestTestfile.cmake" -+) -+rocm_install( -+ FILES "${INSTALL_REGRESSION_TEST_FILE}" -+ DESTINATION "${CMAKE_INSTALL_BINDIR}/${PROJECT_NAME}/regression" -+ COMPONENT tests -+ RENAME "CTestTestfile.cmake" -+) --- -2.43.0 - diff --git a/patches/amd-mainline/rocm-libraries/0010-Fix-windows-build.patch b/patches/amd-mainline/rocm-libraries/0010-Fix-windows-build.patch deleted file mode 100644 index 632aee0daa3..00000000000 --- a/patches/amd-mainline/rocm-libraries/0010-Fix-windows-build.patch +++ /dev/null @@ -1,41 +0,0 @@ -From f22e5d6c531dad5aea95de3a2c0c09dd8599a1bd Mon Sep 17 00:00:00 2001 -From: Bain Syrowik -Date: Tue, 4 Nov 2025 14:55:39 -0500 -Subject: [PATCH 2/2] Fix windows build - ---- - projects/rocwmma/CMakeLists.txt | 2 +- - projects/rocwmma/test/gemm/CMakeLists.txt | 4 ++-- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/projects/rocwmma/CMakeLists.txt b/projects/rocwmma/CMakeLists.txt -index 3ba86520..2af3a278 100644 ---- a/projects/rocwmma/CMakeLists.txt -+++ b/projects/rocwmma/CMakeLists.txt -@@ -51,7 +51,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) - set(CMAKE_CXX_EXTENSIONS OFF) - - #Set Clang C++ flags. --set(CMAKE_CXX_FLAGS_DEBUG "-g -O2") # clang++ crashes without -O2 -+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -O2") # clang++ crashes without -O2 - set(CMAKE_CXX_FLAGS_MINSIZEREL "-O2 -DNDEBUG") # clang++ failed to build the project with the default -Os - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --driver-mode=g++ -Xclang -fallow-half-arguments-and-returns -D__HIP_HCC_COMPAT_MODE__=1 -Wno-format-nonliteral -parallel-jobs=4 -fclang-abi-compat=17") - -diff --git a/projects/rocwmma/test/gemm/CMakeLists.txt b/projects/rocwmma/test/gemm/CMakeLists.txt -index 81f99984..319e35f3 100644 ---- a/projects/rocwmma/test/gemm/CMakeLists.txt -+++ b/projects/rocwmma/test/gemm/CMakeLists.txt -@@ -29,8 +29,8 @@ include( CMakeDependentOption ) - cmake_dependent_option( ROCWMMA_VALIDATE_WITH_ROCBLAS "Use rocBLAS for validation" ON "ROCWMMA_BUILD_VALIDATION_TESTS" OFF ) - cmake_dependent_option( ROCWMMA_BENCHMARK_WITH_ROCBLAS "Include rocBLAS benchmark performance comparisons" OFF "ROCWMMA_BUILD_BENCHMARK_TESTS" OFF ) - --set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CMAKE_COMMAND} -E time") --set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "${CMAKE_COMMAND} -E time") -+set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "\"${CMAKE_COMMAND}\" -E time") -+set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "\"${CMAKE_COMMAND}\" -E time") - - if(ROCWMMA_VALIDATE_WITH_ROCBLAS OR ROCWMMA_BENCHMARK_WITH_ROCBLAS) - find_package( rocblas REQUIRED PATHS /opt/rocm /opt/rocm/rocblas $ENV{ROCBLAS_DIR} ) --- -2.43.0 - From c1cb86f94f95c877c914082a9338bfe26a047d3c Mon Sep 17 00:00:00 2001 From: Bain Syrowik Date: Fri, 7 Nov 2025 13:51:08 -0800 Subject: [PATCH 07/13] Update windows_support.md --- docs/development/windows_support.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/development/windows_support.md b/docs/development/windows_support.md index 017ff16e5e7..27fd3eb09b2 100644 --- a/docs/development/windows_support.md +++ b/docs/development/windows_support.md @@ -70,6 +70,7 @@ mainline, in open source, using MSVC, etc.). | ml-libs | [MIOpen](https://github.com/ROCm/MIOpen) | ✅ | | | ml-libs | [hipDNN](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipdnn) | ❌ | Unsupported | | ml-libs | [MIOpen Legacy Plugin](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipdnn/plugins/miopen_legacy_plugin) | ❌ | Unsupported (requires hipDNN) | + ## Building TheRock from source These instructions mostly mirror the instructions in the root From f217dcce0cc62f2c13f1c8036ffd4f105a182e50 Mon Sep 17 00:00:00 2001 From: Bain Syrowik Date: Mon, 10 Nov 2025 09:59:31 -0800 Subject: [PATCH 08/13] Apply suggestions from code review Co-authored-by: Marius Brehler --- math-libs/artifact-rocwmma.toml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/math-libs/artifact-rocwmma.toml b/math-libs/artifact-rocwmma.toml index 715aa4ae3fc..813a909d069 100644 --- a/math-libs/artifact-rocwmma.toml +++ b/math-libs/artifact-rocwmma.toml @@ -8,17 +8,15 @@ include = [ [components.lib."math-libs/rocWMMA/stage"] optional = true [components.test."math-libs/rocWMMA/stage"] +optional = true include = [ # Samples "bin/simple_*", "bin/perf_*", - "bin/hipRTC_gemm", - "bin/hipRTC_gemm.exe", # Windows + "bin/hipRTC_gemm*", # Tests - "bin/*_test", - "bin/*-validate", - "bin/*_test.exe", # Windows - "bin/*-validate.exe", # Windows + "bin/*_test*", + "bin/*-validate*", "bin/rocwmma/CTestTestfile.cmake", "bin/rocwmma/*/CTestTestfile.cmake", ] From ba6606d2e033329f0fbc3dfe7c748984b2b164c0 Mon Sep 17 00:00:00 2001 From: Bain Syrowik Date: Mon, 10 Nov 2025 13:11:34 -0500 Subject: [PATCH 09/13] Lower test timeout limits to be more reasonable --- build_tools/github_actions/fetch_test_configurations.py | 2 +- .../github_actions/test_executable_scripts/test_rocwmma.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/build_tools/github_actions/fetch_test_configurations.py b/build_tools/github_actions/fetch_test_configurations.py index 9a1936e04c8..6c57d5c6908 100644 --- a/build_tools/github_actions/fetch_test_configurations.py +++ b/build_tools/github_actions/fetch_test_configurations.py @@ -208,7 +208,7 @@ def _get_script_path(script_name: str) -> str: "rocwmma": { "job_name": "rocwmma", "fetch_artifact_args": "--rocwmma --tests --blas", - "timeout_minutes": 60, + "timeout_minutes": 15, "test_script": f"python {_get_script_path('test_rocwmma.py')}", "platform": ["linux", "windows"], "total_shards": 4, diff --git a/build_tools/github_actions/test_executable_scripts/test_rocwmma.py b/build_tools/github_actions/test_executable_scripts/test_rocwmma.py index 561ce60225b..5fd391f69e2 100644 --- a/build_tools/github_actions/test_executable_scripts/test_rocwmma.py +++ b/build_tools/github_actions/test_executable_scripts/test_rocwmma.py @@ -28,7 +28,7 @@ # programatically set test_type to "regression" here. test_subdir = "" -timeout = "3600" +timeout = "900" if test_type == "smoke": # The emulator regression tests are very fast. # If we need something even faster we can use "/smoke" here. @@ -36,7 +36,7 @@ timeout = "300" elif test_type == "regression": test_subdir = "/regression" - timeout = "900" + timeout = "300" cmd = [ "ctest", From 867413d5a007e363e66166b871917b988e959acf Mon Sep 17 00:00:00 2001 From: Bain Syrowik Date: Mon, 10 Nov 2025 14:01:44 -0500 Subject: [PATCH 10/13] Update rocWMMA config --- math-libs/CMakeLists.txt | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/math-libs/CMakeLists.txt b/math-libs/CMakeLists.txt index 5b0db1c07d5..ed67a9c26d2 100644 --- a/math-libs/CMakeLists.txt +++ b/math-libs/CMakeLists.txt @@ -276,14 +276,13 @@ if(THEROCK_ENABLE_ROCWMMA) # rocWMMA ############################################################################## + # Configure optional dependencies + set(_rocwmma_optional_deps) if(THEROCK_ENABLE_BLAS) - set(ROCWMMA_BLAS_OPTIONAL_DEP rocBLAS) + list(append _rocwmma_optional_deps rocBLAS) endif() - if(THEROCK_ROCWMMA_ENABLE_BENCHMARKS) - set(ROCWMMA_ROCM_SMI_OPTIONAL_DEP rocm_smi_lib) - else() - set(THEROCK_ROCWMMA_ENABLE_BENCHMARKS OFF) + list(append _rocwmma_optional_deps rocm_smi_lib) endif() therock_cmake_subproject_declare(rocWMMA @@ -297,8 +296,8 @@ if(THEROCK_ENABLE_ROCWMMA) "-DROCWMMA_BUILD_TESTS=$" "-DROCWMMA_VALIDATE_WITH_ROCBLAS=$" "-DROCWMMA_BENCHMARK_WITH_ROCBLAS=$,$>" - "-DROCWMMA_BUILD_BENCHMARK_TESTS=${THEROCK_ROCWMMA_ENABLE_BENCHMARKS}" - -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON # Needed for Ninja build? + "-DROCWMMA_BUILD_BENCHMARK_TESTS=$" + -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON # Needed for Ninja build -DROCWMMA_USE_SYSTEM_GOOGLETEST=ON # Use therock-googletest CMAKE_INCLUDES therock_explicit_finders.cmake @@ -307,12 +306,10 @@ if(THEROCK_ENABLE_ROCWMMA) BUILD_DEPS rocm-cmake therock-googletest - ${ROCWMMA_BLAS_OPTIONAL_DEP} - ${ROCWMMA_ROCM_SMI_OPTIONAL_DEP} + ${_rocwmma_optional_deps} RUNTIME_DEPS hip-clr - ${ROCWMMA_BLAS_OPTIONAL_DEP} - ${ROCWMMA_ROCM_SMI_OPTIONAL_DEP} + ${_rocwmma_optional_deps} ) therock_cmake_subproject_glob_c_sources(rocWMMA SUBDIRS From 0fff96b22ddf4c68a50c9da5409451039774876d Mon Sep 17 00:00:00 2001 From: Bain Syrowik Date: Mon, 10 Nov 2025 15:58:47 -0500 Subject: [PATCH 11/13] Fix up rocWMMA TheRock options --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d671b962fcc..428ab57f78e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -318,7 +318,8 @@ if(NOT WIN32) cmake_dependent_option(THEROCK_MIOPEN_USE_COMPOSABLE_KERNEL "Enables composable kernel in MIOpen" ON ${THEROCK_ENABLE_COMPOSABLE_KERNEL} OFF) endif() -if(ROCWMMA_VALIDATE_WITH_ROCBLAS OR ROCWMMA_BENCHMARK_WITH_ROCBLAS) +cmake_dependent_option(THEROCK_ROCWMMA_USE_ROCBLAS "Enables rocBLAS validation in rocWMMA" ON ${THEROCK_ENABLE_BLAS} OFF) +if(THEROCK_ROCWMMA_USE_ROCBLAS) set(_rocwmma_blas_requirements "BLAS") else() set(_rocwmma_blas_requirements "") From d31bfd53aaa72593d4b75f1cf5eefc2083914107 Mon Sep 17 00:00:00 2001 From: Bain Syrowik Date: Mon, 10 Nov 2025 14:29:38 -0800 Subject: [PATCH 12/13] Fix list append --- math-libs/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/math-libs/CMakeLists.txt b/math-libs/CMakeLists.txt index ed67a9c26d2..31a72fcf2cd 100644 --- a/math-libs/CMakeLists.txt +++ b/math-libs/CMakeLists.txt @@ -279,10 +279,10 @@ if(THEROCK_ENABLE_ROCWMMA) # Configure optional dependencies set(_rocwmma_optional_deps) if(THEROCK_ENABLE_BLAS) - list(append _rocwmma_optional_deps rocBLAS) + list(APPEND _rocwmma_optional_deps rocBLAS) endif() if(THEROCK_ROCWMMA_ENABLE_BENCHMARKS) - list(append _rocwmma_optional_deps rocm_smi_lib) + list(APPEND _rocwmma_optional_deps rocm_smi_lib) endif() therock_cmake_subproject_declare(rocWMMA From b40c1b3bf23f1d8248bdf403aa6b89572d4f9a08 Mon Sep 17 00:00:00 2001 From: Bain Syrowik Date: Tue, 11 Nov 2025 15:07:22 -0500 Subject: [PATCH 13/13] Clean up rocWMMA options --- CMakeLists.txt | 3 ++- math-libs/CMakeLists.txt | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 428ab57f78e..4ff5da9188d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -318,7 +318,8 @@ if(NOT WIN32) cmake_dependent_option(THEROCK_MIOPEN_USE_COMPOSABLE_KERNEL "Enables composable kernel in MIOpen" ON ${THEROCK_ENABLE_COMPOSABLE_KERNEL} OFF) endif() -cmake_dependent_option(THEROCK_ROCWMMA_USE_ROCBLAS "Enables rocBLAS validation in rocWMMA" ON ${THEROCK_ENABLE_BLAS} OFF) +cmake_dependent_option(THEROCK_ROCWMMA_USE_ROCBLAS "Enables rocBLAS validation in rocWMMA" ON "THEROCK_ENABLE_ROCWMMA;THEROCK_ENABLE_BLAS" OFF) +cmake_dependent_option(THEROCK_ROCWMMA_ENABLE_BENCHMARKS "Enables building rocWMMA benchmarks" OFF "THEROCK_ENABLE_ROCWMMA;THEROCK_BUILD_TESTING" OFF) if(THEROCK_ROCWMMA_USE_ROCBLAS) set(_rocwmma_blas_requirements "BLAS") else() diff --git a/math-libs/CMakeLists.txt b/math-libs/CMakeLists.txt index 31a72fcf2cd..c75d0c3643d 100644 --- a/math-libs/CMakeLists.txt +++ b/math-libs/CMakeLists.txt @@ -278,7 +278,7 @@ if(THEROCK_ENABLE_ROCWMMA) # Configure optional dependencies set(_rocwmma_optional_deps) - if(THEROCK_ENABLE_BLAS) + if(THEROCK_ROCWMMA_USE_ROCBLAS) list(APPEND _rocwmma_optional_deps rocBLAS) endif() if(THEROCK_ROCWMMA_ENABLE_BENCHMARKS) @@ -294,8 +294,8 @@ if(THEROCK_ENABLE_ROCWMMA) -DROCM_PATH= -DROCM_DIR= "-DROCWMMA_BUILD_TESTS=$" - "-DROCWMMA_VALIDATE_WITH_ROCBLAS=$" - "-DROCWMMA_BENCHMARK_WITH_ROCBLAS=$,$>" + "-DROCWMMA_VALIDATE_WITH_ROCBLAS=$" + "-DROCWMMA_BENCHMARK_WITH_ROCBLAS=$,$>" "-DROCWMMA_BUILD_BENCHMARK_TESTS=$" -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON # Needed for Ninja build -DROCWMMA_USE_SYSTEM_GOOGLETEST=ON # Use therock-googletest @@ -315,6 +315,7 @@ if(THEROCK_ENABLE_ROCWMMA) SUBDIRS . ) + therock_cmake_subproject_provide_package(rocWMMA rocwmma lib/cmake/rocwmma) therock_cmake_subproject_activate(rocWMMA) therock_provide_artifact(rocwmma