Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
9f58748
[third-party] Add AOCL-BLAS as alternative CPU reference BLAS for tes…
tony-davis Jan 21, 2026
d92a20d
Fix AOCL-BLAS header installation and dependency ordering
tony-davis Jan 22, 2026
43544f3
Add Tensile artifact cleanup to rocBLAS expunge
tony-davis Jan 22, 2026
36f2757
[third-party] Add AOCL-BLAS as alternative CPU reference BLAS for tes…
tony-davis Jan 21, 2026
e56e84b
Fix AOCL-BLAS header installation to match OpenBLAS pattern
tony-davis Jan 22, 2026
ed444d8
Fix trailing whitespace and apply black formatting
tony-davis Jan 22, 2026
5c3a9eb
Address Copilot review feedback
tony-davis Jan 23, 2026
cf4890b
Apply black formatting to validate_static_library.py
tony-davis Jan 23, 2026
0169599
Remove rocBLAS+clean dependency - target doesn't exist
tony-davis Jan 23, 2026
719b09e
Address Copilot PR review feedback (round 2)
tony-davis Jan 23, 2026
3782d9b
Fix trailing whitespace in aocl/CMakeLists.txt
tony-davis Jan 23, 2026
7291295
Fix AOCL critical header validation - remove non-existent blis64.h
tony-davis Jan 23, 2026
37ddf3f
Add temporary libcblas.a symlink for rocBLAS compatibility
tony-davis Jan 23, 2026
097c219
Fix Windows build - exclude AOCL-BLAS dependencies on Windows
tony-davis Jan 23, 2026
a7dc777
Copy AOCL to rocBLAS build deps location for discovery
tony-davis Jan 23, 2026
0c41523
Move AOCL copy to install phase for single-pass build
tony-davis Jan 23, 2026
03360e2
Enable host math libraries when testing is enabled
tony-davis Jan 23, 2026
02cc76e
Set BUILD_DIR for rocBLAS to enable AOCL discovery
tony-davis Jan 23, 2026
6550819
Force Release build for AOCL to fix catastrophic performance issue
tony-davis Jan 23, 2026
cd71995
Apply black formatting to test_rocblas.py
tony-davis Jan 26, 2026
71582ad
Set OMP_NUM_THREADS=48 to prevent AOCL thread oversubscription in CI
tony-davis Jan 27, 2026
f4995cd
[third-party] Update AOCL CMake package to use standard uppercase con…
tony-davis Jan 28, 2026
f96ed04
Remove AOCL_CLEANUP_TASK.md (not part of this PR)
tony-davis Jan 28, 2026
455cfeb
Enable AOCL-BLAS on Windows with system compiler
tony-davis Feb 2, 2026
5de6077
Merge remote-tracking branch 'origin/main' into users/todavis/aocl-ho…
tony-davis Feb 6, 2026
8c59fee
Make amdsmi truly optional for BLAS (hipBLASLt, hipSPARSELt)
tony-davis Feb 6, 2026
81502f5
AOCL-BLAS integration: stage-based find_package and runtime-only rocB…
tony-davis Feb 9, 2026
96bcf98
Merge branch 'main' into users/todavis/aocl-host-blas
tony-davis Feb 9, 2026
5206eb7
Trim rocBLAS test script: OMP_NUM_THREADS only, no diagnostics
tony-davis Feb 9, 2026
ceb811e
Revert build deps to THEROCK_DIST_DIR in therock_subproject
tony-davis Feb 9, 2026
d874acd
Remove Tensile expunge target from BLAS CMakeLists
tony-davis Feb 9, 2026
7734598
amdsmi package path and optional core/rocrtst integration
tony-davis Feb 18, 2026
e472ae9
Merge main into users/todavis/aocl-host-blas
tony-davis Feb 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion BUILD_TOPOLOGY.toml
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,13 @@ type = "target-neutral"
artifact_deps = []
feature_group = "HOST_MATH" # Controlled by THEROCK_ENABLE_HOST_MATH (OFF by default)

[artifacts.host-aocl-blas]
artifact_group = "third-party-libs"
type = "target-neutral"
artifact_deps = [] # No dependencies - uses system compiler like OpenBLAS
feature_group = "HOST_MATH" # Controlled by THEROCK_ENABLE_HOST_MATH (OFF by default)
# AOCL-BLAS provides BLIS/BLAS/CBLAS functions for CPU reference BLAS in testing

[artifacts.host-suite-sparse]
artifact_group = "third-party-libs"
type = "target-neutral"
Expand Down Expand Up @@ -494,7 +501,7 @@ feature_group = "CORE" # Part of core, enabled by default
[artifacts.blas]
artifact_group = "math-libs"
type = "target-specific"
artifact_deps = ["core-runtime", "core-hip", "core-amdsmi", "host-blas", "host-suite-sparse", "rocprofiler-sdk"]
artifact_deps = ["core-runtime", "core-hip", "core-amdsmi", "host-blas", "host-aocl-blas", "host-suite-sparse", "rocprofiler-sdk"]
split_databases = ["rocblas", "hipblaslt"]

[artifacts.fft]
Expand Down
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME AND NOT DEFINED THEROCK_BUILD_TESTIN
endif()
if(THEROCK_BUILD_TESTING)
message(STATUS "Enabling building tests")
# Enable host math libraries (OpenBLAS, AOCL-BLAS, SuiteSparse) when testing is enabled
# These provide CPU reference BLAS implementations for rocBLAS clients/tests
if(NOT DEFINED THEROCK_ENABLE_HOST_MATH)
set(THEROCK_ENABLE_HOST_MATH ON)
message(STATUS "Enabling host math libraries for testing")
endif()
endif()

################################################################################
Expand Down
1 change: 0 additions & 1 deletion base/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ therock_cmake_subproject_provide_package(rocm-core rocm-core lib/cmake/rocm-core
therock_cmake_subproject_activate(rocm-core)



################################################################################
# rocm_smi_lib
################################################################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@

logging.basicConfig(level=logging.INFO)

# Cap OMP_NUM_THREADS for AOCL-BLAS in CI to avoid 60-100x slowdown from oversubscription.
if "OMP_NUM_THREADS" not in environ_vars:
environ_vars["OMP_NUM_THREADS"] = "48"
logging.info("Set OMP_NUM_THREADS=48 for rocBLAS/AOCL tests")

# If smoke tests are enabled, we run smoke tests only.
# Otherwise, we run the normal test suite
test_type = os.getenv("TEST_TYPE", "full")
Expand All @@ -41,5 +46,6 @@
subprocess.run(
cmd,
cwd=THEROCK_DIR,
env=environ_vars,
check=True,
)
56 changes: 56 additions & 0 deletions build_tools/validate_static_library.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env python
"""Validates that a static library exists and is a valid archive."""

import argparse
import os
import subprocess
import sys


def run(args: argparse.Namespace):
for static_lib in args.static_libs:
print(f"Validating static library: {static_lib}", end="")

# Check if file exists
if not os.path.isfile(static_lib):
print(f" : ERROR - File does not exist")
sys.exit(1)

# Check if it's a valid archive using 'ar'
try:
result = subprocess.run(
["ar", "t", static_lib],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=True,
)
# Filter empty strings to correctly count object files
object_files = [
line for line in result.stdout.decode().splitlines() if line
]
num_objects = len(object_files)
if num_objects == 0:
print(f" : ERROR - Archive is empty (0 object files)")
sys.exit(1)
print(f" : OK (contains {num_objects} object files)")
except subprocess.CalledProcessError as e:
print(f" : ERROR - Not a valid archive: {e.stderr.decode()}")
sys.exit(1)
except FileNotFoundError:
# 'ar' command not found, just check file size
size_mb = os.path.getsize(static_lib) / (1024 * 1024)
if size_mb <= 0:
print(f" : ERROR - File size is 0 MB, not a valid static library")
sys.exit(1)
print(f" : OK ({size_mb:.1f} MB)")


def main(argv):
p = argparse.ArgumentParser()
p.add_argument("static_libs", nargs="*", help="Static libraries to validate")
args = p.parse_args(argv)
run(args)


if __name__ == "__main__":
main(sys.argv[1:])
37 changes: 37 additions & 0 deletions cmake/therock_subproject.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,43 @@ function(therock_cmake_subproject_activate target_name)
list(APPEND _build_env_pairs "--unset=HIP_PATH")
list(APPEND _build_env_pairs "--unset=HIP_DIR")

# On Windows, preserve critical MSVC and Windows SDK environment variables
# These are needed for CMake's compiler detection and FindOpenMP to work correctly
# in nested CMake invocations (like AOCL's build system).
if(WIN32)
# Preserve key Windows SDK and MSVC environment variables
foreach(_win_env_var
PATH
WindowsSdkDir
WindowsSDKVersion
WindowsSDKLibVersion
WindowsLibPath
VCToolsInstallDir
VCToolsRedistDir
UniversalCRTSdkDir
UCRTVersion
INCLUDE
LIB
LIBPATH
DevEnvDir
VisualStudioVersion
VSCMD_VER
VSCMD_ARG_HOST_ARCH
VSCMD_ARG_TGT_ARCH
)
if(DEFINED ENV{${_win_env_var}})
# Escape semicolons in the environment variable value to prevent CMake from
# treating them as list separators (critical for PATH, INCLUDE, LIB, etc.)
string(REPLACE ";" "\\;" _escaped_value "$ENV{${_win_env_var}}")
# Remove trailing backslashes to prevent them from escaping the closing quote
# This is critical for paths like "C:\Program Files\...\10\" where the trailing
# backslash would escape the quote and break argument parsing in cmake -E env
string(REGEX REPLACE "\\\\+$" "" _escaped_value "${_escaped_value}")
list(APPEND _build_env_pairs "${_win_env_var}=${_escaped_value}")
endif()
endforeach()
endif()

# Handle compiler toolchain.
set(_compiler_toolchain_addl_depends)
set(_compiler_toolchain_init_contents)
Expand Down
35 changes: 35 additions & 0 deletions cmake/therock_testing.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,38 @@ function(therock_test_validate_shared_lib)
)
endforeach()
endfunction()

# Adds a test for static libraries under a common path.
# PATH: Common path (relative to CMAKE_CURRENT_BINARY_DIR if not absolute)
# LIB_NAMES: Library names to validate
function(therock_test_validate_static_lib)
cmake_parse_arguments(
PARSE_ARGV 0 ARG
""
"PATH"
"LIB_NAMES"
)

# Skip static library validation for sanitizer builds (matches shared lib behavior).
if(NOT "${THEROCK_SANITIZER}" STREQUAL "")
return()
endif()

if(WIN32)
# This helper is Linux only.
return()
endif()

if(NOT IS_ABSOLUTE ARG_PATH)
cmake_path(ABSOLUTE_PATH ARG_PATH BASE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
endif()

foreach(lib_name ${ARG_LIB_NAMES})
add_test(
NAME therock-validate-static-lib-${lib_name}
COMMAND
"${Python3_EXECUTABLE}" "${THEROCK_SOURCE_DIR}/build_tools/validate_static_library.py"
"${ARG_PATH}/${lib_name}"
)
endforeach()
endfunction()
13 changes: 9 additions & 4 deletions math-libs/BLAS/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -166,12 +166,14 @@ list(APPEND _blas_subproject_names hipBLASLt)

set(rocBLAS_optional_runtime_deps)
if(NOT WIN32)
# rocBLAS is hard-coded to not expect rocm-smi.
if(TARGET rocm_smi_lib)
list(APPEND rocBLAS_optional_runtime_deps rocm_smi_lib)
endif()
elseif(THEROCK_BUILD_TESTING)
list(APPEND rocBLAS_optional_runtime_deps therock-host-blas)
endif()
if(THEROCK_BUILD_TESTING)
# CPU reference BLAS libraries for rocBLAS clients/tests (mirror OpenBLAS: runtime dep only)
list(APPEND rocBLAS_optional_runtime_deps therock-host-blas) # OpenBLAS
list(APPEND rocBLAS_optional_runtime_deps therock-aocl-blas) # AOCL-BLAS (Linux & Windows)
endif()

therock_cmake_subproject_declare(rocBLAS
Expand All @@ -183,6 +185,9 @@ therock_cmake_subproject_declare(rocBLAS
-DROCM_PATH=
-DROCM_DIR=
-Dhipblaslt_path=
# BUILD_DIR: Custom variable used by rocBLAS clients to locate bundled dependencies
# Set to rocBLAS's build directory so it can find deps/ subdirectory
"-DBUILD_DIR=${CMAKE_CURRENT_BINARY_DIR}/rocBLAS/build"
-DBUILD_WITH_TENSILE=ON
-DBUILD_WITH_HIPBLASLT=ON
# TODO: With `Tensile_TEST_LOCAL_PATH` set, the resulting build path is ${Tensile_TEST_LOCAL_PATH}/build.
Expand All @@ -192,7 +197,7 @@ therock_cmake_subproject_declare(rocBLAS
-DTENSILE_VERSION=
-DBUILD_CLIENTS_TESTS=${THEROCK_BUILD_TESTING}
-DBUILD_CLIENTS_BENCHMARKS=${THEROCK_BUILD_TESTING}
-DLINK_BLIS=OFF
-DLINK_BLIS=${THEROCK_BUILD_TESTING} # Links AOCL-BLAS (libaocl.a) for CPU BLAS
CMAKE_INCLUDES
therock_explicit_finders.cmake
COMPILER_TOOLCHAIN
Expand Down
5 changes: 4 additions & 1 deletion third-party/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@ add_subdirectory(frugally-deep)
# spdlog depends on fmt.
add_subdirectory(spdlog)

# Host math libraries.
# Host math libraries (CPU reference BLAS for testing).
if(THEROCK_ENABLE_HOST_BLAS)
add_subdirectory(host-blas)
endif()
if(THEROCK_ENABLE_HOST_AOCL_BLAS)
add_subdirectory(aocl) # AOCL-BLAS: Alternative to OpenBLAS
endif()
if(THEROCK_ENABLE_HOST_SUITE_SPARSE)
add_subdirectory(SuiteSparse)
endif()
Expand Down
Loading
Loading