Skip to content
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
easyblock = 'PythonBundle'

name = 'nvidia-cutlass'
version = '3.8.0.0'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://pypi.org/project/nvidia-cutlass'
description = """
CUTLASS is a collection of CUDA C++ template abstractions for implementing high-performance
matrix-matrix multiplication (GEMM) and related computations at all levels and scales within CUDA.
It incorporates strategies for hierarchical decomposition and data movement similar to those used
to implement cuBLAS and cuDNN.
CUTLASS decomposes these "moving parts" into reusable, modular software components abstracted by C++ template classes.
Primitives for different levels of a conceptual parallelization hierarchy can be specialized and tuned
via custom tiling sizes, data types, and other algorithmic policy.
The resulting flexibility simplifies their use as building blocks within custom kernels and applications.
"""

toolchain = {'name': 'gfbf', 'version': '2024a'}

builddependencies = [
('poetry', '1.8.3'),
]

dependencies = [
('CUDA', '12.6.0', '', SYSTEM),
('CUDA-Python', '12.6.2.post1', versionsuffix),
('Python', '3.12.3'),
('Python-bundle-PyPI', '2024.06'),
('SciPy-bundle', '2024.05'),
('networkx', '3.4.2'),
('pydot', '3.0.3'),
]

exts_list = [
('treelib', '1.8.0', {
'sources': [SOURCE_TAR_GZ],
'checksums': ['e1be2c6b66ffbfae85079fc4c76fb4909946d01d915ee29ff6795de53aed5d55'],
}),
(name, version, {
'source_tmpl': 'nvidia_cutlass-%(version)s-py3-none-any.whl',
'post_install_patches': [{
'name': 'nvidia-cutlass-3.8.0.0_fix-BytesWarning.patch',
'sourcepath': 'lib/python%(pyshortver)s/site-packages/cutlass',
'level': 3,
}],
'checksums': [
'013147221a63500205da233ae02e6262463917f3fe39cb09efbca37bfd1c39f9',
{'nvidia-cutlass-3.8.0.0_fix-BytesWarning.patch':
'63eb47894340c0ea03d0d2faaa49c1979915f903b5bc2ced17f8e0dd5ab854ed'},
],
'modulename': 'cutlass',
}),
]

sanity_check_commands = [
'python -sc "import cutlass_library"',
'python -bb -sc "' + '; '.join((
'import cutlass',
# These serves as a smoke test, e.g. nvcc_version() was incompatible with -bb
"assert cutlass.nvcc_version().startswith('%(cudamajver)s')",
"assert cutlass.cuda_install_path() == '$EBROOTCUDA'",
)) + '"',
]

moduleclass = 'lib'
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
When neither `text` nor `encoding` is specified to `subprocess.run` then `stdout` will be of type `bytes`.
A subsequent `str(stdout)` causes a `BytesWarning` which might result in errors, e.g. if `python -bb` is used.
See https://github.com/NVIDIA/cutlass/pull/2682

Fixes hard failures in PyTorch tests that do use `-bb`.

Author: Alexander Grund (TU Dresden)

diff --git a/python/cutlass_cppgen/__init__.py b/python/cutlass_cppgen/__init__.py
index 9bdd259c02..0e28ff55fd 100644
--- a/python/cutlass_cppgen/__init__.py
+++ b/python/cutlass_cppgen/__init__.py
@@ -39,11 +39,11 @@
def _cuda_install_path_from_nvcc() -> str:
import subprocess
# Attempt to detect CUDA_INSTALL_PATH based on location of NVCC
- result = subprocess.run(['/usr/bin/which', 'nvcc'], capture_output=True)
+ result = subprocess.run(['/usr/bin/which', 'nvcc'], capture_output=True, text=True)
if result.returncode != 0:
raise Exception(f'Unable to find nvcc via `which` utility.')

- cuda_install_path = result.stdout.decode('utf-8').split('/bin/nvcc')[0]
+ cuda_install_path = result.stdout.split('/bin/nvcc')[0]
if not os.path.isdir(cuda_install_path):
raise Exception(f'Environment variable "CUDA_INSTALL_PATH" is not defined, '
f'and default path of {cuda_install_path} does not exist.')
@@ -63,10 +63,10 @@ def nvcc_version():
import subprocess

# Attempt to get NVCC version
- result = subprocess.run(['nvcc', '--version'], capture_output=True)
+ result = subprocess.run(['nvcc', '--version'], capture_output=True, text=True)
if result.returncode != 0:
raise Exception('Unable to run `nvcc --version')
- _NVCC_VERSION = str(result.stdout).split(" release ")[-1].split(",")[0]
+ _NVCC_VERSION = result.stdout.split(" release ")[-1].split(",")[0]
return _NVCC_VERSION

_CUDA_INSTALL_PATH = None