From b4da946f7996124fb80c206a2a6d51d9aaeb9b6b Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Tue, 10 Mar 2026 14:23:10 +0100 Subject: [PATCH 1/4] Sort values for cuda_compute_capabilities templates There have been instances where the order matters so sort the values. Even if it doesn't solve some issues it will at least be consistent. --- easybuild/framework/easyblock.py | 14 +++---- easybuild/framework/easyconfig/templates.py | 3 ++ test/framework/easyconfig.py | 42 ++++++++++----------- 3 files changed, 30 insertions(+), 29 deletions(-) diff --git a/easybuild/framework/easyblock.py b/easybuild/framework/easyblock.py index 927ca2f4da..c0fff171ec 100644 --- a/easybuild/framework/easyblock.py +++ b/easybuild/framework/easyblock.py @@ -3041,14 +3041,13 @@ def prepare_step(self, start_dir=True, load_tc_deps_modules=True): # Set CUDA compute capabilities from default value in nvidia-compiler/NVHPC toolchains if get_software_root('nvidia-compilers'): - cuda_cc_cfg = self.cfg.get('cuda_compute_capabilities') - cuda_cc_opt = build_option('cuda_compute_capabilities') + cuda_cc = self.cfg.get_cuda_cc_template_value('cuda_compute_capabilities', required=False) cuda_cc_nvhpc = os.getenv('EBNVHPCCUDACC', None) - if not cuda_cc_cfg and not cuda_cc_opt and cuda_cc_nvhpc: + if not cuda_cc and cuda_cc_nvhpc: self.cfg['cuda_compute_capabilities'] = cuda_cc_nvhpc.split(',') self.log.info( "Updated empty 'cuda_compute_capabilities' option with default CUDA compute capability " - f"defined in nvidia-compilers: {self.cfg['cuda_compute_capabilities']}" + f"defined in nvidia-compilers: {cuda_cc_nvhpc}" ) # guess directory to start configure/build/install process in, and move there @@ -3460,7 +3459,7 @@ def sanity_check_cuda(self, cuda_dirs=None): self.log.info("Checking binaries/libraries for CUDA device code...") fail_msgs = [] - cfg_ccs = build_option('cuda_compute_capabilities') or self.cfg.get('cuda_compute_capabilities', None) + cfg_ccs = self.cfg.get_cuda_cc_template_value('cuda_cc_space_sep', required=False).split() ignore_failures = not build_option('cuda_sanity_check_error_on_failed_checks') strict_cc_check = build_option('cuda_sanity_check_strict') accept_ptx_as_devcode = build_option('cuda_sanity_check_accept_ptx_as_devcode') @@ -3471,7 +3470,7 @@ def sanity_check_cuda(self, cuda_dirs=None): ignore_file_list = [os.path.join(self.installdir, d) for d in self.cfg['cuda_sanity_ignore_files']] # If there are no CUDA compute capabilities defined, return - if cfg_ccs is None or len(cfg_ccs) == 0: + if not cfg_ccs: self.log.info("Skipping CUDA sanity check, as no CUDA compute capabilities were configured") return fail_msgs @@ -3653,8 +3652,7 @@ def format_file_list(files_list): self.log.warning(fail_msg) # Check whether there is ptx code for the highest CC in cfg_ccs - # Make sure to use LooseVersion so that e.g. 9.0 < 9.0a < 9.2 < 9.10 - highest_cc = [sorted(cfg_ccs, key=LooseVersion)[-1]] + highest_cc = [cfg_ccs[-1]] # Template is already sorted missing_ptx_ccs = list(set(highest_cc) - set(found_ptx_ccs)) if missing_ptx_ccs: diff --git a/easybuild/framework/easyconfig/templates.py b/easybuild/framework/easyconfig/templates.py index 8acf7a57ee..59f9f2b033 100644 --- a/easybuild/framework/easyconfig/templates.py +++ b/easybuild/framework/easyconfig/templates.py @@ -38,6 +38,7 @@ import re from easybuild.base import fancylogger +from easybuild.tools import LooseVersion from easybuild.tools.build_log import EasyBuildError from easybuild.tools.config import build_option from easybuild.tools.systemtools import get_shared_lib_ext, pick_dep_version @@ -500,6 +501,8 @@ def template_constant_dict(config, ignore=None, toolchain=None): # Use the commandline / easybuild config option if given, else use the value from the EC (as a default) cuda_cc = build_option('cuda_compute_capabilities') or config.get('cuda_compute_capabilities') if cuda_cc: + # Sort ascending for uniform behavior also avoiding e.g. cudaErrorInvalidDeviceFunction in some situations + cuda_cc = sorted(cuda_cc, key=LooseVersion) template_values['cuda_compute_capabilities'] = ','.join(cuda_cc) template_values['cuda_cc_space_sep'] = ' '.join(cuda_cc) template_values['cuda_cc_space_sep_no_period'] = ' '.join(cc.replace('.', '') for cc in cuda_cc) diff --git a/test/framework/easyconfig.py b/test/framework/easyconfig.py index 81128325f6..0e441c5a48 100644 --- a/test/framework/easyconfig.py +++ b/test/framework/easyconfig.py @@ -4833,7 +4833,7 @@ def test_cuda_compute_capabilities(self): homepage = 'https://example.com' description = 'test' toolchain = SYSTEM - cuda_compute_capabilities = ['5.1', '7.0', '7.1'] + cuda_compute_capabilities = ['5.1', '7.1', '7.0'] # Unordered to test sorting preconfigopts = 'CUDAARCHS="%(cuda_cc_cmake)s"' configopts = 'comma="%(cuda_sm_comma_sep)s" space="%(cuda_sm_space_sep)s"' prebuildopts = '%(cuda_cc_semicolon_sep)s' @@ -4856,17 +4856,17 @@ def test_cuda_compute_capabilities(self): self.assertEqual(ec['installopts'], '5.1,7.0,7.1') # build options overwrite it - init_config(build_options={'cuda_compute_capabilities': ['4.2', '6.3']}) + init_config(build_options={'cuda_compute_capabilities': ['12.1', '4.2']}) ec = EasyConfig(self.eb_file) - self.assertEqual(ec['preconfigopts'], 'CUDAARCHS="42;63"') - self.assertEqual(ec['configopts'], 'comma="sm_42,sm_63" ' - 'space="sm_42 sm_63"') - self.assertEqual(ec['buildopts'], 'comma="42,63" ' - 'space="42 63" ' - 'semi="42;63"') - self.assertEqual(ec['prebuildopts'], '4.2;6.3') - self.assertEqual(ec['preinstallopts'], 'period="4.2 6.3" noperiod="42 63"') - self.assertEqual(ec['installopts'], '4.2,6.3') + self.assertEqual(ec['preconfigopts'], 'CUDAARCHS="42;121"') + self.assertEqual(ec['configopts'], 'comma="sm_42,sm_121" ' + 'space="sm_42 sm_121"') + self.assertEqual(ec['buildopts'], 'comma="42,121" ' + 'space="42 121" ' + 'semi="42;121"') + self.assertEqual(ec['prebuildopts'], '4.2;12.1') + self.assertEqual(ec['preinstallopts'], 'period="4.2 12.1" noperiod="42 121"') + self.assertEqual(ec['installopts'], '4.2,12.1') def test_amdgcn_capabilities(self): self.contents = textwrap.dedent(""" @@ -5169,20 +5169,20 @@ def test_get_cuda_cc_template_value(self): error_pattern += r"Make sure that either the --cuda-compute-capabilities EasyBuild configuration " error_pattern += "option is set, or that the cuda_compute_capabilities easyconfig parameter is defined." cuda_template_values = { - 'cuda_compute_capabilities': '6.5,7.0', - 'cuda_cc_space_sep': '6.5 7.0', - 'cuda_cc_semicolon_sep': '6.5;7.0', - 'cuda_int_comma_sep': '65,70', - 'cuda_int_space_sep': '65 70', - 'cuda_int_semicolon_sep': '65;70', - 'cuda_sm_comma_sep': 'sm_65,sm_70', - 'cuda_sm_space_sep': 'sm_65 sm_70', + 'cuda_compute_capabilities': '6.5,12.0', + 'cuda_cc_space_sep': '6.5 12.0', + 'cuda_cc_semicolon_sep': '6.5;12.0', + 'cuda_int_comma_sep': '65,120', + 'cuda_int_space_sep': '65 120', + 'cuda_int_semicolon_sep': '65;120', + 'cuda_sm_comma_sep': 'sm_65,sm_120', + 'cuda_sm_space_sep': 'sm_65 sm_120', } for key in cuda_template_values: self.assertErrorRegex(EasyBuildError, error_pattern % key, ec.get_cuda_cc_template_value, key) self.assertEqual(ec.get_cuda_cc_template_value(key, required=False), '') - update_build_option('cuda_compute_capabilities', ['6.5', '7.0']) + update_build_option('cuda_compute_capabilities', ['6.5', '12.0']) ec = EasyConfig(self.eb_file) for key, expected in cuda_template_values.items(): @@ -5194,7 +5194,7 @@ def test_get_cuda_cc_template_value(self): for key in cuda_template_values: self.assertErrorRegex(EasyBuildError, error_pattern % key, ec.get_cuda_cc_template_value, key) - self.contents += "\ncuda_compute_capabilities = ['6.5', '7.0']" + self.contents += "\ncuda_compute_capabilities = ['12.0', '6.5']" self.prep() ec = EasyConfig(self.eb_file) From e72cc62df1fa8b91490d2aa0cc93de6ff5a50950 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Tue, 10 Mar 2026 16:02:35 +0100 Subject: [PATCH 2/4] Update docu of templates --- easybuild/framework/easyconfig/easyconfig.py | 1 + easybuild/framework/easyconfig/templates.py | 22 ++++++++++---------- test/framework/options.py | 4 ++-- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/easybuild/framework/easyconfig/easyconfig.py b/easybuild/framework/easyconfig/easyconfig.py index 82cedaf0f3..6ec6ed271b 100644 --- a/easybuild/framework/easyconfig/easyconfig.py +++ b/easybuild/framework/easyconfig/easyconfig.py @@ -1988,6 +1988,7 @@ def get_cuda_cc_template_value(self, key, required=True): and cuda_compute_capabilities easyconfig parameter. Returns user-friendly error message in case neither are defined, or if an unknown key is used. + The individual values are sorted in ascending order. :param required: If False and the key is not found, return an empty string instead of raising an error. """ diff --git a/easybuild/framework/easyconfig/templates.py b/easybuild/framework/easyconfig/templates.py index 59f9f2b033..5924180619 100644 --- a/easybuild/framework/easyconfig/templates.py +++ b/easybuild/framework/easyconfig/templates.py @@ -101,20 +101,20 @@ "via amdgcn_capabilities easyconfig parameter", 'amdgcn_cc_space_sep': "Space-separated list of AMDGCN capabilities", 'amdgcn_cc_semicolon_sep': "Semicolon-separated list of AMDGCN capabilities", - 'cuda_compute_capabilities': "Comma-separated list of CUDA compute capabilities, as specified via " + 'cuda_compute_capabilities': "Comma-separated list of sorted CUDA compute capabilities, as specified via " "--cuda-compute-capabilities configuration option or " "via cuda_compute_capabilities easyconfig parameter", - 'cuda_cc_cmake': 'List of CUDA compute capabilities suitable for use with $CUDAARCHS in CMake 3.18+', - 'cuda_cc_nvhpc': 'List of CUDA compute capabilities suitable for use with -gpu option in NVHPC compilers', - 'cuda_cc_space_sep': 'Space-separated list of CUDA compute capabilities', + 'cuda_cc_cmake': 'List of sorted CUDA compute capabilities suitable for use with $CUDAARCHS in CMake 3.18+', + 'cuda_cc_nvhpc': 'List of sorted CUDA compute capabilities suitable for use with -gpu option in NVHPC compilers', + 'cuda_cc_space_sep': 'Space-separated list of sorted CUDA compute capabilities', 'cuda_cc_space_sep_no_period': - "Space-separated list of CUDA compute capabilities, without periods (e.g. '80 90').", - 'cuda_cc_semicolon_sep': 'Semicolon-separated list of CUDA compute capabilities', - 'cuda_int_comma_sep': 'Comma-separated list of integer CUDA compute capabilities', - 'cuda_int_space_sep': 'Space-separated list of integer CUDA compute capabilities', - 'cuda_int_semicolon_sep': 'Semicolon-separated list of integer CUDA compute capabilities', - 'cuda_sm_comma_sep': 'Comma-separated list of sm_* values that correspond with CUDA compute capabilities', - 'cuda_sm_space_sep': 'Space-separated list of sm_* values that correspond with CUDA compute capabilities', + "Space-separated list of sorted CUDA compute capabilities, without periods (e.g. '80 90').", + 'cuda_cc_semicolon_sep': 'Semicolon-separated list of sorted CUDA compute capabilities', + 'cuda_int_comma_sep': 'Comma-separated list of integer sorted CUDA compute capabilities', + 'cuda_int_space_sep': 'Space-separated list of integer sorted CUDA compute capabilities', + 'cuda_int_semicolon_sep': 'Semicolon-separated list of integer sorted CUDA compute capabilities', + 'cuda_sm_comma_sep': 'Comma-separated list of sm_* values that correspond with sorted CUDA compute capabilities', + 'cuda_sm_space_sep': 'Space-separated list of sm_* values that correspond with sorted CUDA compute capabilities', 'mpi_cmd_prefix': 'Prefix command for running MPI programs (with default number of ranks)', 'parallel': "Degree of parallelism for e.g. make", # can't be a boolean (True/False), must be a string value since it's a string template diff --git a/test/framework/options.py b/test/framework/options.py index 6f12703d40..dbcd456940 100644 --- a/test/framework/options.py +++ b/test/framework/options.py @@ -655,7 +655,7 @@ def run_test(fmt=None): r'^\* ``%\(name\)s``$', r'^``%\(namelower\)s``\s+lower case of value of name\s*$', r'^``%\(arch\)s``\s+System architecture \(e.g. x86_64, aarch64, ppc64le, ...\)\s*$', - r'^``%\(cuda_cc_space_sep\)s``\s+Space-separated list of CUDA compute capabilities\s*$', + r'^``%\(cuda_cc_space_sep\)s``\s+Space-separated list of sorted CUDA compute capabilities\s*$', r'^``SOURCE_TAR_GZ``\s+Source \.tar\.gz bundle\s+``%\(name\)s-%\(version\)s.tar.gz``\s*$', r'^``%\(software_commit\)s``\s+Git commit id to use for the software as specified ' 'by --software-commit command line option', @@ -669,7 +669,7 @@ def run_test(fmt=None): r'^\s+%\(name\)s$', r'^\s+%\(namelower\)s: lower case of value of name$', r'^\s+%\(arch\)s: System architecture \(e.g. x86_64, aarch64, ppc64le, ...\)$', - r'^\s+%\(cuda_cc_space_sep\)s: Space-separated list of CUDA compute capabilities$', + r'^\s+%\(cuda_cc_space_sep\)s: Space-separated list of sorted CUDA compute capabilities$', r'^\s+SOURCE_TAR_GZ: Source \.tar\.gz bundle \(%\(name\)s-%\(version\)s.tar.gz\)$', r'^\s+%\(software_commit\)s: Git commit id to use for the software as specified ' 'by --software-commit command line option', From 6340b7be6856de4fa4f4f3e6e6bfefd13a646c5f Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 9 Apr 2026 13:43:32 +0200 Subject: [PATCH 3/4] Deduplicate CUDA CCs in template --- easybuild/framework/easyconfig/templates.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/easybuild/framework/easyconfig/templates.py b/easybuild/framework/easyconfig/templates.py index 5924180619..7ae97f6872 100644 --- a/easybuild/framework/easyconfig/templates.py +++ b/easybuild/framework/easyconfig/templates.py @@ -501,8 +501,12 @@ def template_constant_dict(config, ignore=None, toolchain=None): # Use the commandline / easybuild config option if given, else use the value from the EC (as a default) cuda_cc = build_option('cuda_compute_capabilities') or config.get('cuda_compute_capabilities') if cuda_cc: - # Sort ascending for uniform behavior also avoiding e.g. cudaErrorInvalidDeviceFunction in some situations - cuda_cc = sorted(cuda_cc, key=LooseVersion) + if not isinstance(cuda_cc, (list, tuple, set)): + raise EasyBuildError("Invalid type of cuda_compute_capabilities(=%s): %s. Expected list, tuple or set.", + cuda_cc, type(cuda_cc).__name__) + # Deduplicate and sort values ascending + # for uniform behavior also avoiding e.g. cudaErrorInvalidDeviceFunction in some situations + cuda_cc = sorted(set(cuda_cc), key=LooseVersion) template_values['cuda_compute_capabilities'] = ','.join(cuda_cc) template_values['cuda_cc_space_sep'] = ' '.join(cuda_cc) template_values['cuda_cc_space_sep_no_period'] = ' '.join(cc.replace('.', '') for cc in cuda_cc) From c2436b8f5910620b83f12080a9c94849d9311b63 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 9 Apr 2026 13:44:10 +0200 Subject: [PATCH 4/4] Mention order-independentness in CUDA cc CLI doc --- easybuild/tools/options.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/easybuild/tools/options.py b/easybuild/tools/options.py index e55b08b459..11a3a7bffc 100644 --- a/easybuild/tools/options.py +++ b/easybuild/tools/options.py @@ -399,9 +399,10 @@ def override_options(self): 'cuda-cache-maxsize': ("Maximum size of the CUDA cache (in MiB) used for JIT compilation of PTX code. " "Leave value empty to let EasyBuild choose a value or '0' to disable the cache", int, 'store_or_None', None), - 'cuda-compute-capabilities': ("List of CUDA compute capabilities to use when building GPU software; " - "values should be specified as digits separated by a dot, " - "for example: 3.5,5.0,7.2. EasyBuild will (where possible) compile fat " + 'cuda-compute-capabilities': ("Set of CUDA compute capabilities to use when building GPU software. " + "Values must be specified as digits separated by a dot, " + "for example: 3.5,5.0,7.2. Order does not matter. " + "EasyBuild will (where possible) compile fat " "binaries with support for (at least) all requested CUDA compute " "capabilities, and PTX code for the highest CUDA compute capability (for " "forwards compatibility). The check on this behavior may be relaxed using "