diff --git a/easybuild/easyconfigs/t/Triton/Triton-3.1.0-foss-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/t/Triton/Triton-3.1.0-foss-2024a-CUDA-12.6.0.eb new file mode 100644 index 00000000000..696e5d55c73 --- /dev/null +++ b/easybuild/easyconfigs/t/Triton/Triton-3.1.0-foss-2024a-CUDA-12.6.0.eb @@ -0,0 +1,115 @@ +# Update 3.1.0: Thomas Hoffmann, EMBL Heidelberg, structures-it@embl.de, 2024/12 + +easyblock = 'PythonBundle' + +name = 'Triton' + +version = '3.1.0' +versionsuffix = '-CUDA-%(cudaver)s' +# There is no 3.1 in pypi and no 3.1-tag at github. However, 5fe38ffd is version bump 3.1 in the release_3.1.x branch: +_commit = '5fe38ffd73c2ac6ed6323b554205186696631c6f' +_clang_commit = '10dc3a8e916d73291269e5e2b82dd22681489aa1' # acc. to cmake/llvm-hash.txt; 2024/05/23 + +homepage = 'https://triton-lang.org/' + +description = """Triton is a language and compiler for parallel programming. It aims to provide a +Python-based programming environment for productively writing custom DNN compute +kernels capable of running at maximal throughput on modern GPU hardware.""" + +toolchain = {'name': 'foss', 'version': '2024a'} + +github_account = 'openai' + +builddependencies = [ + ('CMake', '3.29.3'), + ('Ninja', '1.12.1'), + ('pybind11', '2.13.6'), + ('poetry', '1.8.3'), + ('nlohmann_json', '3.11.3'), + ('googletest', '1.15.2'), +] + +dependencies = [ + ('CUDA', '12.6.0', '', SYSTEM), + ('Python', '3.12.3'), + ('Z3', '4.13.0'), + ('Python-bundle-PyPI', '2024.06'), +] + +_llvm_confopts = [ + # acc. to: + # https://github.com/triton-lang/triton?tab=readme-ov-file#building-with-a-custom-llvm + '-DLLVM_ENABLE_ASSERTIONS=ON', + '-DLLVM_ENABLE_PROJECTS="mlir;llvm"', + '-DLLVM_TARGETS_TO_BUILD="X86;NVPTX"', +] + +components = [ + ('LLVM', _clang_commit, { + 'easyblock': 'CMakeNinja', + 'source_urls': ['https://github.com/llvm/llvm-project/archive/'], + 'sources': [{ + 'download_filename': '%(version)s.tar.gz', + 'filename': 'llvm-project-%(version)s.tar.gz', + }], + 'checksums': [ + {'llvm-project-10dc3a8e916d73291269e5e2b82dd22681489aa1.tar.gz': + '6ee5e0f9a49d41b5f48ebc4613ce3371f686bf70fcece9f849aba3c37bdeb3e8'}, + ], + 'start_dir': 'llvm-project-%(version)s', + 'configopts': ' '.join(_llvm_confopts), + 'srcdir': 'llvm', + 'skipsteps': ['install'] + }) +] + +_tr_start_dir = 'python' + +_tr_preinstallopts = 'export PYBIND11_SYSPATH=$EBROOTPYBIND11 && ' +_tr_preinstallopts += 'export JSON_SYSPATH=$EBROOTNLOHMANN_JSON && ' +# use LLVM component in builddir: +_tr_preinstallopts += 'export PATH=%(builddir)s/easybuild_obj/bin:$PATH && ' +_tr_preinstallopts += 'export LLVM_INCLUDE_DIRS=%(builddir)s/easybuild_obj/include && ' +_tr_preinstallopts += 'export LLVM_LIBRARY_DIR=%(builddir)s/easybuild_obj/lib && ' +_tr_preinstallopts += 'export LLVM_SYSPATH=%(builddir)s/easybuild_obj/ && ' + +_tr_preinstallopts += 'export TRITON_BUILD_WITH_CLANG_LLD=false && ' +_tr_preinstallopts += 'export TRITON_HOME=%(builddir)s && ' + +_tr_installopts = "-v " + +exts_list = [ + (name, version, { + 'installopts': _tr_installopts, + 'patches': [ + 'Triton-3.1.0_5fe38ff_eb_env_python_build.patch', + 'Triton-3.1.0_5fe38ff_CUDA-12.6_ptx.patch', + ], + # ensure that libdevice.10.bc from $EBROOTCUDA/nvvm/libdevice is used: + 'postinstallcmds': [ + 'rm -rf %(installdir)s/lib/python%(pyshortver)s/site-packages/triton/backends/nvidia/lib/libdevice.10.bc' + ], + 'preinstallopts': _tr_preinstallopts, + 'source_urls': ['https://github.com/triton-lang/triton/archive/'], + 'sources': [{ + 'filename': 'v%%(version)s-%s.tar.gz' % _commit, + 'download_filename': '%s.tar.gz' % _commit}], + 'start_dir': 'python', + 'checksums': [ + {'v3.1.0-5fe38ffd73c2ac6ed6323b554205186696631c6f.tar.gz': + '933babc32b69872efbce05fe8be61129fecf52c724fadea42d8c7b2d10e16ad9'}, + {'Triton-3.1.0_5fe38ff_eb_env_python_build.patch': + '6b46064b4892c7df340b6afd7ffb4abb2ea4486df9406626cd9b2c92a748705d'}, + {'Triton-3.1.0_5fe38ff_CUDA-12.6_ptx.patch': + '2be8609141375ee381364ef74d74c12af598fc0b06357689c9f32d9f2514eff4'}, + ], + }), +] + +modextravars = { + 'TRITON_PTXAS_PATH': '$CUDA_HOME/bin/ptxas', + # ensure that libdevice.10.bc from $EBROOTCUDA/nvvm/libdevice is used: + 'TRITON_LIBDEVICE_PATH': '$CUDA_HOME/nvvm/libdevice/libdevice.10.bc' +} + +moduleclass = 'devel' diff --git a/easybuild/easyconfigs/t/Triton/Triton-3.1.0_5fe38ff_CUDA-12.6_ptx.patch b/easybuild/easyconfigs/t/Triton/Triton-3.1.0_5fe38ff_CUDA-12.6_ptx.patch new file mode 100644 index 00000000000..d50a4dfa6b6 --- /dev/null +++ b/easybuild/easyconfigs/t/Triton/Triton-3.1.0_5fe38ff_CUDA-12.6_ptx.patch @@ -0,0 +1,28 @@ +# Thomas Hoffmann, EMBL Heidelberg, structures-it@embl.de, 2025/01 +# acc. to https://github.com/bertmaher/triton/commit/fd1709c5674b86ec277fecf55f2421e30f26b2aa : +# [CODEGEN] Support CUDA 12.6 (triton-lang#4588) +# According to the +# [table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history), +# both CUDA 12.5 and 12.6 use PTX ISA 8.5 + +diff -ru triton/third_party/nvidia/backend/compiler.py triton-3.1.0_CUDA_12.6_ptx/third_party/nvidia/backend/compiler.py +--- triton/third_party/nvidia/backend/compiler.py 2024-11-29 08:38:26.000000000 +0100 ++++ triton-3.1.0_CUDA_12.6_ptx/third_party/nvidia/backend/compiler.py 2025-01-20 15:13:23.114632925 +0100 +@@ -44,12 +44,16 @@ + assert isinstance(cuda_version, str) + major, minor = map(int, cuda_version.split('.')) + if major == 12: ++ if minor < 6: ++ return 80 + minor ++ elif minor == 6: ++ return 85 + return 80 + minor + if major == 11: + return 70 + minor + if major == 10: + return 63 + minor +- raise RuntimeError("Triton only support CUDA 10.0 or higher") ++ raise RuntimeError("Triton only support CUDA 10.0 or higher, but got CUDA version: " + cuda_version) + + + @functools.lru_cache(None) diff --git a/easybuild/easyconfigs/t/Triton/Triton-3.1.0_5fe38ff_eb_env_python_build.patch b/easybuild/easyconfigs/t/Triton/Triton-3.1.0_5fe38ff_eb_env_python_build.patch new file mode 100644 index 00000000000..57072cdbd92 --- /dev/null +++ b/easybuild/easyconfigs/t/Triton/Triton-3.1.0_5fe38ff_eb_env_python_build.patch @@ -0,0 +1,221 @@ +# Thomas Hoffmann, EMBL Heidelberg, structures-it@embl.de, 2024/12 +# (adapted from Triton-2.1.0-use_eb_env_python_build.patch and Triton-2.1.0-disable_rocm_support.patch) +# - disable support for AMD GPUs +# - disable all downloads at build time. + +diff -ru triton-5fe38ffd73c2ac6ed6323b554205186696631c6f/CMakeLists.txt triton-5fe38ffd73c2ac6ed6323b554205186696631c6f_eb_env_python_build/CMakeLists.txt +--- triton-5fe38ffd73c2ac6ed6323b554205186696631c6f/CMakeLists.txt 2024-09-10 23:44:54.000000000 +0200 ++++ triton-5fe38ffd73c2ac6ed6323b554205186696631c6f_eb_env_python_build/CMakeLists.txt 2024-12-19 11:17:38.959269261 +0100 +@@ -184,7 +184,7 @@ + ${triton_plugins} + + # mlir +- MLIRAMDGPUDialect ++ #MLIRAMDGPUDialect + MLIRNVVMDialect + MLIRNVVMToLLVMIRTranslation + MLIRGPUToNVVMTransforms +@@ -208,9 +208,9 @@ + # LLVM + LLVMPasses + LLVMNVPTXCodeGen +- # LLVMNVPTXAsmPrinter +- LLVMAMDGPUCodeGen +- LLVMAMDGPUAsmParser ++ #LLVMNVPTXAsmPrinter ++ #LLVMAMDGPUCodeGen ++ #LLVMAMDGPUAsmParser + + ) + if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR # Linux arm64 +diff -ru triton-5fe38ffd73c2ac6ed6323b554205186696631c6f/python/setup.py triton-5fe38ffd73c2ac6ed6323b554205186696631c6f_eb_env_python_build/python/setup.py +--- triton-5fe38ffd73c2ac6ed6323b554205186696631c6f/python/setup.py 2024-09-10 23:44:54.000000000 +0200 ++++ triton-5fe38ffd73c2ac6ed6323b554205186696631c6f_eb_env_python_build/python/setup.py 2024-12-19 12:36:55.358107277 +0100 +@@ -130,13 +130,15 @@ + version = pybind11_version_file.read().strip() + name = f"pybind11-{version}" + url = f"https://github.com/pybind/pybind11/archive/refs/tags/v{version}.tar.gz" +- return Package("pybind11", name, url, "PYBIND11_INCLUDE_DIR", "", "PYBIND11_SYSPATH") ++ #return Package("pybind11", name, url, "PYBIND11_INCLUDE_DIR", "", "PYBIND11_SYSPATH") ++ return Package("pybind11", name, '', "PYBIND11_INCLUDE_DIR", "", "PYBIND11_SYSPATH") + + + # json + def get_json_package_info(): + url = "https://github.com/nlohmann/json/releases/download/v3.11.3/include.zip" +- return Package("json", "", url, "JSON_INCLUDE_DIR", "", "JSON_SYSPATH") ++ #return Package("json", "", url, "JSON_INCLUDE_DIR", "", "JSON_SYSPATH") ++ return Package("json", "", '', "JSON_INCLUDE_DIR", "", "JSON_SYSPATH") + + + # llvm +@@ -201,7 +203,9 @@ + + + def get_triton_cache_path(): +- user_home = os.getenv("HOME") or os.getenv("USERPROFILE") or os.getenv("HOMEPATH") or None ++ user_home = os.getenv("TRITON_HOME") ++ if not user_home: ++ user_home = os.getenv("HOME") or os.getenv("USERPROFILE") or os.getenv("HOMEPATH") or None + if not user_home: + raise RuntimeError("Could not find user home directory") + return os.path.join(user_home, ".triton") +@@ -222,14 +226,14 @@ + shutil.rmtree(package_root_dir) + os.makedirs(package_root_dir, exist_ok=True) + print(f'downloading and extracting {p.url} ...') +- with open_url(p.url) as response: +- if p.url.endswith(".zip"): +- file_bytes = BytesIO(response.read()) +- with zipfile.ZipFile(file_bytes, "r") as file: +- file.extractall(path=package_root_dir) +- else: +- with tarfile.open(fileobj=response, mode="r|*") as file: +- file.extractall(path=package_root_dir) ++ #with open_url(p.url) as response: ++ # if p.url.endswith(".zip"): ++ # file_bytes = BytesIO(response.read()) ++ # with zipfile.ZipFile(file_bytes, "r") as file: ++ # file.extractall(path=package_root_dir) ++ # else: ++ # with tarfile.open(fileobj=response, mode="r|*") as file: ++ # file.extractall(path=package_root_dir) + # write version url to package_dir + with open(os.path.join(package_dir, "version.txt"), "w") as f: + f.write(p.url) +@@ -363,12 +367,14 @@ + "-G", "Ninja", # Ninja is much faster than make + "-DCMAKE_MAKE_PROGRAM=" + + ninja_dir, # Pass explicit path to ninja otherwise cmake may cache a temporary path +- "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", "-DLLVM_ENABLE_WERROR=ON", ++ "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", ++ # "-DLLVM_ENABLE_WERROR=ON", + "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir, "-DTRITON_BUILD_TUTORIALS=OFF", + "-DTRITON_BUILD_PYTHON_MODULE=ON", "-DPython3_EXECUTABLE:FILEPATH=" + sys.executable, + "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON", "-DPYTHON_INCLUDE_DIRS=" + python_include_dir, + "-DTRITON_CODEGEN_BACKENDS=" + ';'.join([b.name for b in backends if not b.is_external]), +- "-DTRITON_PLUGIN_DIRS=" + ';'.join([b.src_dir for b in backends if b.is_external]) ++ "-DTRITON_PLUGIN_DIRS=" + ';'.join([b.src_dir for b in backends if b.is_external]), ++ "-DFETCHCONTENT_FULLY_DISCONNECTED=1", + ] + if lit_dir is not None: + cmake_args.append("-DLLVM_EXTERNAL_LIT=" + lit_dir) +@@ -432,54 +438,54 @@ + with open(nvidia_version_path, "r") as nvidia_version_file: + NVIDIA_TOOLCHAIN_VERSION = nvidia_version_file.read().strip() + +-download_and_copy( +- name="ptxas", +- src_path="bin/ptxas", +- variable="TRITON_PTXAS_PATH", +- version=NVIDIA_TOOLCHAIN_VERSION, +- url_func=lambda arch, version: +- f"https://anaconda.org/nvidia/cuda-nvcc/{version}/download/linux-{arch}/cuda-nvcc-{version}-0.tar.bz2", +-) +-download_and_copy( +- name="cuobjdump", +- src_path="bin/cuobjdump", +- variable="TRITON_CUOBJDUMP_PATH", +- version=NVIDIA_TOOLCHAIN_VERSION, +- url_func=lambda arch, version: +- f"https://anaconda.org/nvidia/cuda-cuobjdump/{version}/download/linux-{arch}/cuda-cuobjdump-{version}-0.tar.bz2", +-) +-download_and_copy( +- name="nvdisasm", +- src_path="bin/nvdisasm", +- variable="TRITON_NVDISASM_PATH", +- version=NVIDIA_TOOLCHAIN_VERSION, +- url_func=lambda arch, version: +- f"https://anaconda.org/nvidia/cuda-nvdisasm/{version}/download/linux-{arch}/cuda-nvdisasm-{version}-0.tar.bz2", +-) +-download_and_copy( +- name="cudacrt", +- src_path="include", +- variable="TRITON_CUDACRT_PATH", +- version=NVIDIA_TOOLCHAIN_VERSION, +- url_func=lambda arch, version: +- f"https://anaconda.org/nvidia/cuda-nvcc/{version}/download/linux-{arch}/cuda-nvcc-{version}-0.tar.bz2", +-) +-download_and_copy( +- name="cudart", +- src_path="include", +- variable="TRITON_CUDART_PATH", +- version=NVIDIA_TOOLCHAIN_VERSION, +- url_func=lambda arch, version: +- f"https://anaconda.org/nvidia/cuda-cudart-dev/{version}/download/linux-{arch}/cuda-cudart-dev-{version}-0.tar.bz2", +-) +-download_and_copy( +- name="cupti", +- src_path="include", +- variable="TRITON_CUPTI_PATH", +- version=NVIDIA_TOOLCHAIN_VERSION, +- url_func=lambda arch, version: +- f"https://anaconda.org/nvidia/cuda-cupti/{version}/download/linux-{arch}/cuda-cupti-{version}-0.tar.bz2", +-) ++#download_and_copy( ++# name="ptxas", ++# src_path="bin/ptxas", ++# variable="TRITON_PTXAS_PATH", ++# version=NVIDIA_TOOLCHAIN_VERSION, ++# url_func=lambda arch, version: ++# f"https://anaconda.org/nvidia/cuda-nvcc/{version}/download/linux-{arch}/cuda-nvcc-{version}-0.tar.bz2", ++#) ++#download_and_copy( ++# name="cuobjdump", ++# src_path="bin/cuobjdump", ++# variable="TRITON_CUOBJDUMP_PATH", ++# version=NVIDIA_TOOLCHAIN_VERSION, ++# url_func=lambda arch, version: ++# f"https://anaconda.org/nvidia/cuda-cuobjdump/{version}/download/linux-{arch}/cuda-cuobjdump-{version}-0.tar.bz2", ++#) ++#download_and_copy( ++# name="nvdisasm", ++# src_path="bin/nvdisasm", ++# variable="TRITON_NVDISASM_PATH", ++# version=NVIDIA_TOOLCHAIN_VERSION, ++# url_func=lambda arch, version: ++# f"https://anaconda.org/nvidia/cuda-nvdisasm/{version}/download/linux-{arch}/cuda-nvdisasm-{version}-0.tar.bz2", ++#) ++#download_and_copy( ++# name="cudacrt", ++# src_path="include", ++# variable="TRITON_CUDACRT_PATH", ++# version=NVIDIA_TOOLCHAIN_VERSION, ++# url_func=lambda arch, version: ++# f"https://anaconda.org/nvidia/cuda-nvcc/{version}/download/linux-{arch}/cuda-nvcc-{version}-0.tar.bz2", ++#) ++#download_and_copy( ++# name="cudart", ++# src_path="include", ++# variable="TRITON_CUDART_PATH", ++# version=NVIDIA_TOOLCHAIN_VERSION, ++# url_func=lambda arch, version: ++# f"https://anaconda.org/nvidia/cuda-cudart-dev/{version}/download/linux-{arch}/cuda-cudart-dev-{version}-0.tar.bz2", ++#) ++#download_and_copy( ++# name="cupti", ++# src_path="include", ++# variable="TRITON_CUPTI_PATH", ++# version=NVIDIA_TOOLCHAIN_VERSION, ++# url_func=lambda arch, version: ++# f"https://anaconda.org/nvidia/cuda-cupti/{version}/download/linux-{arch}/cuda-cupti-{version}-0.tar.bz2", ++#) + + backends = [*BackendInstaller.copy(["nvidia", "amd"]), *BackendInstaller.copy_externals()] + +diff -ru triton-5fe38ffd73c2ac6ed6323b554205186696631c6f/unittest/CMakeLists.txt triton-5fe38ffd73c2ac6ed6323b554205186696631c6f_eb_env_python_build/unittest/CMakeLists.txt +--- triton-5fe38ffd73c2ac6ed6323b554205186696631c6f/unittest/CMakeLists.txt 2024-09-10 23:44:54.000000000 +0200 ++++ triton-5fe38ffd73c2ac6ed6323b554205186696631c6f_eb_env_python_build/unittest/CMakeLists.txt 2024-12-19 13:43:42.815629305 +0100 +@@ -1,8 +1,11 @@ +-include (${CMAKE_CURRENT_SOURCE_DIR}/googletest.cmake) ++#include (${CMAKE_CURRENT_SOURCE_DIR}/googletest.cmake) + + include(GoogleTest) + enable_testing() + ++find_package(GTest REQUIRED) ++include_directories(${GTEST_INCLUDE_DIR}) ++ + get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) + get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS) + get_property(triton_libs GLOBAL PROPERTY TRITON_LIBS)