Skip to content
Merged
13 changes: 13 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,19 @@ if(NOT WIN32)
cmake_dependent_option(THEROCK_MIOPEN_USE_COMPOSABLE_KERNEL "Enables composable kernel in MIOpen" ON ${THEROCK_ENABLE_COMPOSABLE_KERNEL} OFF)
endif()

cmake_dependent_option(THEROCK_ROCWMMA_USE_ROCBLAS "Enables rocBLAS validation in rocWMMA" ON "THEROCK_ENABLE_ROCWMMA;THEROCK_ENABLE_BLAS" OFF)
cmake_dependent_option(THEROCK_ROCWMMA_ENABLE_BENCHMARKS "Enables building rocWMMA benchmarks" OFF "THEROCK_ENABLE_ROCWMMA;THEROCK_BUILD_TESTING" OFF)
if(THEROCK_ROCWMMA_USE_ROCBLAS)
set(_rocwmma_blas_requirements "BLAS")
else()
set(_rocwmma_blas_requirements "")
endif()
therock_add_feature(ROCWMMA
GROUP MATH_LIBS
DESCRIPTION "Enables rocWMMA"
REQUIRES COMPILER HIP_RUNTIME ${_rocwmma_blas_requirements}
)

# ML-Libs Features.
therock_add_feature(MIOPEN
GROUP ML_LIBS
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ minimal build):
| `-DTHEROCK_ENABLE_MIOPEN=ON` | Enables MIOpen |
| `-DTHEROCK_ENABLE_MIOPEN_PLUGIN=ON` | Enables MIOpen_plugin |
| `-DTHEROCK_ENABLE_HIPDNN=ON` | Enables hipDNN |
| `-DTHEROCK_ENABLE_ROCWMMA=ON` | Enables rocWMMA |

> [!TIP]
> Enabling any features will implicitly enable their *minimum* dependencies. Some
Expand Down
9 changes: 9 additions & 0 deletions build_tools/github_actions/fetch_test_configurations.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,15 @@ def _get_script_path(script_name: str) -> str:
"platform": ["linux"],
"total_shards": 1,
},
# rocWMMA tests
"rocwmma": {
"job_name": "rocwmma",
"fetch_artifact_args": "--rocwmma --tests --blas",
"timeout_minutes": 15,
"test_script": f"python {_get_script_path('test_rocwmma.py')}",
"platform": ["linux", "windows"],
"total_shards": 4,
},
}


Expand Down
58 changes: 58 additions & 0 deletions build_tools/github_actions/test_executable_scripts/test_rocwmma.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import logging
import os
import shlex
import subprocess
from pathlib import Path

THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR")
AMDGPU_FAMILIES = os.getenv("AMDGPU_FAMILIES")
platform = os.getenv("RUNNER_OS").lower()
SCRIPT_DIR = Path(__file__).resolve().parent
THEROCK_DIR = SCRIPT_DIR.parent.parent.parent

# GTest sharding
SHARD_INDEX = os.getenv("SHARD_INDEX", 1)
TOTAL_SHARDS = os.getenv("TOTAL_SHARDS", 1)
environ_vars = os.environ.copy()
# For display purposes in the GitHub Action UI, the shard array is 1th indexed. However for shard indexes, we convert it to 0th index.
Comment thread
bsyrowik marked this conversation as resolved.
Comment thread
bsyrowik marked this conversation as resolved.
environ_vars["GTEST_SHARD_INDEX"] = str(int(SHARD_INDEX) - 1)
environ_vars["GTEST_TOTAL_SHARDS"] = str(TOTAL_SHARDS)

logging.basicConfig(level=logging.INFO)

# If smoke tests are enabled, we run smoke tests only.
# Otherwise, we run the normal test suite
test_type = os.getenv("TEST_TYPE", "full")

# If there are devices for which the full set is too slow, we can
# programatically set test_type to "regression" here.

test_subdir = ""
timeout = "900"
if test_type == "smoke":
# The emulator regression tests are very fast.
# If we need something even faster we can use "/smoke" here.
test_subdir = "/regression"
timeout = "300"
elif test_type == "regression":
test_subdir = "/regression"
timeout = "300"

cmd = [
"ctest",
"--test-dir",
f"{THEROCK_BIN_DIR}/rocwmma{test_subdir}",
"--output-on-failure",
"--parallel",
"8",
"--timeout",
timeout,
]
logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}")

subprocess.run(
cmd,
cwd=THEROCK_DIR,
check=True,
env=environ_vars,
)
23 changes: 21 additions & 2 deletions build_tools/install_rocm_from_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,17 @@
(--artifact-group ARTIFACT_GROUP | --amdgpu_family AMDGPU_FAMILY)
[--output-dir OUTPUT_DIR]
(--run-id RUN_ID | --release RELEASE | --input-dir INPUT_DIR)
[--blas | --no-blas] [--fft | --no-fft] [--hipdnn | --no-hipdnn] [--miopen | --no-miopen] [--miopen-plugin | --no-miopen-plugin]
[--prim | --no-prim] [--rand | --no-rand] [--rccl | --no-rccl] [--tests | --no-tests] [--base-only]
[--blas | --no-blas]
[--fft | --no-fft]
[--hipdnn | --no-hipdnn]
[--miopen | --no-miopen]
[--miopen-plugin | --no-miopen-plugin]
[--prim | --no-prim]
[--rand | --no-rand]
[--rccl | --no-rccl]
[--rocwmma | --no-rocwmma]
[--tests | --no-tests]
[--base-only]

Examples:
- Downloads and unpacks the gfx94X S3 artifacts from GitHub CI workflow run 14474448215
Expand Down Expand Up @@ -170,6 +179,7 @@ def retrieve_artifacts_by_run_id(args):
args.prim,
args.rand,
args.rccl,
args.rocwmma,
]
):
argv.extend(base_artifact_patterns)
Expand All @@ -193,6 +203,8 @@ def retrieve_artifacts_by_run_id(args):
extra_artifacts.append("rand")
if args.rccl:
extra_artifacts.append("rccl")
if args.rocwmma:
extra_artifacts.append("rocwmma")

extra_artifact_patterns = [f"{a}_lib" for a in extra_artifacts]
if args.tests:
Expand Down Expand Up @@ -374,6 +386,13 @@ def main(argv):
action=argparse.BooleanOptionalAction,
)

artifacts_group.add_argument(
"--rocwmma",
default=False,
help="Include 'rocwmma' artifacts",
action=argparse.BooleanOptionalAction,
)

artifacts_group.add_argument(
"--tests",
default=False,
Expand Down
9 changes: 9 additions & 0 deletions cmake/therock_amdgpu_targets.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ therock_add_amdgpu_target(gfx906 "Radeon VII / MI50 CDNA" FAMILY dgpu-all gfx90X
hipBLASLt # https://github.com/ROCm/TheRock/issues/1062
hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042
composable_kernel # https://github.com/ROCm/TheRock/issues/1245
rocWMMA # https://github.com/ROCm/TheRock/issues/1944
)
therock_add_amdgpu_target(gfx908 "MI100 CDNA" FAMILY gfx90X-all dcgpu-all gfx90X-dcgpu
EXCLUDE_TARGET_PROJECTS
Expand All @@ -69,40 +70,47 @@ therock_add_amdgpu_target(gfx1010 "AMD RX 5700" FAMILY dgpu-all gfx101X-all gfx1
hipBLASLt # https://github.com/ROCm/TheRock/issues/1062
hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042
composable_kernel # https://github.com/ROCm/TheRock/issues/1245
rocWMMA # https://github.com/ROCm/TheRock/issues/1944
)
therock_add_amdgpu_target(gfx1011 "AMD Radeon Pro V520" FAMILY dgpu-all gfx101X-all gfx101X-dgpu
EXCLUDE_TARGET_PROJECTS
hipBLASLt # https://github.com/ROCm/TheRock/issues/1062
hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042
composable_kernel # https://github.com/ROCm/TheRock/issues/1245
rocWMMA # https://github.com/ROCm/TheRock/issues/1944
)
therock_add_amdgpu_target(gfx1012 "AMD RX 5500" FAMILY dgpu-all gfx101X-all gfx101X-dgpu
EXCLUDE_TARGET_PROJECTS
hipBLASLt # https://github.com/ROCm/TheRock/issues/1062
hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042
composable_kernel # https://github.com/ROCm/TheRock/issues/1245
rocWMMA # https://github.com/ROCm/TheRock/issues/1944
)

# gfx103X family
therock_add_amdgpu_target(gfx1030 "AMD RX 6800 / XT" FAMILY dgpu-all gfx103X-all gfx103X-dgpu
EXCLUDE_TARGET_PROJECTS
hipBLASLt # https://github.com/ROCm/TheRock/issues/1062
hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042
rocWMMA # https://github.com/ROCm/TheRock/issues/1944
)
therock_add_amdgpu_target(gfx1032 "AMD RX 6600" FAMILY dgpu-all gfx103X-all gfx103X-dgpu
EXCLUDE_TARGET_PROJECTS
hipBLASLt # https://github.com/ROCm/TheRock/issues/1062
hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042
rocWMMA # https://github.com/ROCm/TheRock/issues/1944
)
therock_add_amdgpu_target(gfx1035 "AMD Radeon 680M Laptop iGPU" igpu-all FAMILY gfx103X-all gfx103X-igpu
EXCLUDE_TARGET_PROJECTS
hipBLASLt # https://github.com/ROCm/TheRock/issues/1062
hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042
rocWMMA # https://github.com/ROCm/TheRock/issues/1944
)
therock_add_amdgpu_target(gfx1036 "AMD Raphael iGPU" FAMILY igpu-all gfx103X-all gfx103X-igpu
EXCLUDE_TARGET_PROJECTS
hipBLASLt # https://github.com/ROCm/TheRock/issues/1062
hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042
rocWMMA # https://github.com/ROCm/TheRock/issues/1944
)

# gfx110X family
Expand All @@ -123,6 +131,7 @@ therock_add_amdgpu_target(gfx1103 "AMD Radeon 780M Laptop iGPU" FAMILY igpu-all
EXCLUDE_TARGET_PROJECTS
hipSPARSELt # https://github.com/ROCm/TheRock/issues/2042
rccl # https://github.com/ROCm/TheRock/issues/150
rocWMMA # https://github.com/ROCm/TheRock/issues/1944
)

# gfx115X family
Expand Down
1 change: 1 addition & 0 deletions docs/development/installing_artifacts.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ The script supports the following command-line options:
| `--prim` | Flag | Include primitives artifacts |
| `--rand` | Flag | Include random number generator artifacts |
| `--rccl` | Flag | Include RCCL artifacts |
| `--rocwmma` | Flag | Include rocWMMA artifacts |
| `--release` | String | Release version from nightly or dev tarballs |
| `--run-id` | String | GitHub CI workflow run ID to install from |
| `--tests` | Flag | Include test artifacts for enabled components |
Expand Down
1 change: 1 addition & 0 deletions docs/development/windows_support.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ mainline, in open source, using MSVC, etc.).
| math-libs (BLAS) | [rocSOLVER](https://github.com/ROCm/rocSOLVER) | ✅ | |
| math-libs (BLAS) | [hipSOLVER](https://github.com/ROCm/hipSOLVER) | ✅ | |
| math-libs (BLAS) | [hipBLAS](https://github.com/ROCm/hipBLAS) | ✅ | |
| math-libs | [rocWMMA](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocwmma) | ✅ | |
| | | | |
| ml-libs | [Composable Kernel](https://github.com/ROCm/composable_kernel) | ❌ | Unsupported |
| ml-libs | [MIOpen](https://github.com/ROCm/MIOpen) | ✅ | |
Expand Down
1 change: 1 addition & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ add_test(
"-DTHEROCK_ENABLE_PRIM=${THEROCK_ENABLE_PRIM}"
"-DTHEROCK_ENABLE_RAND=${THEROCK_ENABLE_RAND}"
"-DTHEROCK_ENABLE_RCCL=${THEROCK_ENABLE_RCCL}"
"-DTHEROCK_ENABLE_ROCWMMA=${THEROCK_ENABLE_ROCWMMA}"
"-DTHEROCK_ENABLE_SOLVER=${THEROCK_ENABLE_SOLVER}"
"-DTHEROCK_ENABLE_SPARSE=${THEROCK_ENABLE_SPARSE}"
${HIP_CMAKE_ARGS}
Expand Down
1 change: 1 addition & 0 deletions examples/clean_configure_test_project.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ set(propagate_vars
THEROCK_ENABLE_PRIM
THEROCK_ENABLE_RAND
THEROCK_ENABLE_RCCL
THEROCK_ENABLE_ROCWMMA
THEROCK_ENABLE_SOLVER
THEROCK_ENABLE_SPARSE
CMAKE_HIP_PLATFORM
Expand Down
6 changes: 6 additions & 0 deletions examples/cpp-sdk-user/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ option(THEROCK_ENABLE_MIOPEN "Whether MIOpen is available" ON)
option(THEROCK_ENABLE_PRIM "Whether rocprim is available" ON)
option(THEROCK_ENABLE_RAND "Whether rocrand/hiprand are available" ON)
option(THEROCK_ENABLE_RCCL "Whether rccl is available" ON)
option(THEROCK_ENABLE_ROCWMMA "Whether rocwmma is available" ON)
option(THEROCK_ENABLE_SOLVER "Whether rocsolver/hipsolver are available" ON)
option(THEROCK_ENABLE_SPARSE "Whether rocsparse/hipsparse/hipsparselt are available" ON)
option(ENABLE_DEVICE_TEST "Whether to enable testing that requires a device" OFF)
Expand Down Expand Up @@ -144,3 +145,8 @@ if(THEROCK_ENABLE_HIPDNN)
find_package(hipdnn_backend CONFIG REQUIRED)
message(STATUS "hipdnn version: ${hipdnn_VERSION}")
endif()

if(THEROCK_ENABLE_ROCWMMA)
find_package(rocWMMA CONFIG REQUIRED)
message(STATUS "rocWMMA version: ${rocWMMA_VERSION}")
endif()
59 changes: 59 additions & 0 deletions math-libs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -270,3 +270,62 @@ if(THEROCK_ENABLE_BLAS)
add_subdirectory(support)
add_subdirectory(BLAS)
endif()

if(THEROCK_ENABLE_ROCWMMA)
##############################################################################
# rocWMMA
##############################################################################

# Configure optional dependencies
set(_rocwmma_optional_deps)
if(THEROCK_ROCWMMA_USE_ROCBLAS)
list(APPEND _rocwmma_optional_deps rocBLAS)
endif()
if(THEROCK_ROCWMMA_ENABLE_BENCHMARKS)
list(APPEND _rocwmma_optional_deps rocm_smi_lib)
endif()

therock_cmake_subproject_declare(rocWMMA
EXTERNAL_SOURCE_DIR "${THEROCK_ROCM_LIBRARIES_SOURCE_DIR}/projects/rocwmma"
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/rocWMMA"
BACKGROUND_BUILD
CMAKE_ARGS
-DHIP_PLATFORM=amd
-DROCM_PATH=
-DROCM_DIR=
"-DROCWMMA_BUILD_TESTS=$<BOOL:${THEROCK_BUILD_TESTING}>"
"-DROCWMMA_VALIDATE_WITH_ROCBLAS=$<BOOL:${THEROCK_ROCWMMA_USE_ROCBLAS}>"
"-DROCWMMA_BENCHMARK_WITH_ROCBLAS=$<AND:$<BOOL:${THEROCK_ROCWMMA_USE_ROCBLAS}>,$<BOOL:${THEROCK_ROCWMMA_ENABLE_BENCHMARKS}>>"
"-DROCWMMA_BUILD_BENCHMARK_TESTS=$<BOOL:${THEROCK_ROCWMMA_ENABLE_BENCHMARKS}>"
Comment thread
bsyrowik marked this conversation as resolved.
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON # Needed for Ninja build
-DROCWMMA_USE_SYSTEM_GOOGLETEST=ON # Use therock-googletest
CMAKE_INCLUDES
therock_explicit_finders.cmake
COMPILER_TOOLCHAIN
amd-hip
BUILD_DEPS
rocm-cmake
therock-googletest
${_rocwmma_optional_deps}
RUNTIME_DEPS
hip-clr
${_rocwmma_optional_deps}
)
therock_cmake_subproject_glob_c_sources(rocWMMA
SUBDIRS
.
)
Comment thread
bsyrowik marked this conversation as resolved.
therock_cmake_subproject_provide_package(rocWMMA rocwmma lib/cmake/rocwmma)
therock_cmake_subproject_activate(rocWMMA)

therock_provide_artifact(rocwmma
DESCRIPTOR artifact-rocwmma.toml
COMPONENTS
dbg
dev
lib
test
SUBPROJECT_DEPS
rocWMMA
)
endif(THEROCK_ENABLE_ROCWMMA)
22 changes: 22 additions & 0 deletions math-libs/artifact-rocwmma.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# rocWMMA
[components.dbg."math-libs/rocWMMA/stage"]
optional = true
[components.dev."math-libs/rocWMMA/stage"]
Comment thread
bsyrowik marked this conversation as resolved.
include = [
"include/rocwmma/*",
]
[components.lib."math-libs/rocWMMA/stage"]
optional = true
[components.test."math-libs/rocWMMA/stage"]
Comment thread
bsyrowik marked this conversation as resolved.
optional = true
include = [
# Samples
"bin/simple_*",
"bin/perf_*",
"bin/hipRTC_gemm*",
# Tests
"bin/*_test*",
"bin/*-validate*",
"bin/rocwmma/CTestTestfile.cmake",
"bin/rocwmma/*/CTestTestfile.cmake",
]
Loading