easybuilders · tanmoy1989 · Nov 20, 2024 · Jan 20, 2025 · Jan 20, 2025
diff --git a/easybuild/easyconfigs/f/FFTW.MPI/FFTW.MPI-3.3.10-nvompi-2023b.eb b/easybuild/easyconfigs/f/FFTW.MPI/FFTW.MPI-3.3.10-nvompi-2023b.eb
@@ -0,0 +1,19 @@
+name = 'FFTW.MPI'
+version = '3.3.10'
+
+homepage = 'https://www.fftw.org'
+description = """FFTW is a C subroutine library for computing the discrete Fourier transform (DFT)
+in one or more dimensions, of arbitrary input size, and of both real and complex data."""
+
+toolchain = {'name': 'nvompi', 'version': '2023b'}
+toolchainopts = {'pic': True}
+
+source_urls = [homepage]
+sources = ['fftw-%(version)s.tar.gz']
+checksums = ['56c932549852cddcfafdab3820b0200c7742675be92179e59e6215b340e26467']
+
+dependencies = [('FFTW', '3.3.10')]
+
+runtest = 'check'
+
+moduleclass = 'numlib'
diff --git a/easybuild/easyconfigs/f/FFTW/FFTW-3.3.10-NVHPC-24.1-CUDA-12.4.0.eb b/easybuild/easyconfigs/f/FFTW/FFTW-3.3.10-NVHPC-24.1-CUDA-12.4.0.eb
@@ -0,0 +1,20 @@
+name = 'FFTW'
+version = '3.3.10'
+
+homepage = 'https://www.fftw.org'
+description = """FFTW is a C subroutine library for computing the discrete Fourier transform (DFT)
+in one or more dimensions, of arbitrary input size, and of both real and complex data."""
+
+toolchain = {'name': 'NVHPC', 'version': '24.1-CUDA-12.4.0'}
+toolchainopts = {'pic': True}
+
+source_urls = [homepage]
+sources = [SOURCELOWER_TAR_GZ]
+checksums = ['56c932549852cddcfafdab3820b0200c7742675be92179e59e6215b340e26467']
+
+# Does not work with nvc
+with_quad_prec = False
+
+runtest = 'check'
+
+moduleclass = 'numlib'
diff --git a/easybuild/easyconfigs/f/FlexiBLAS/FlexiBLAS-3.3.1-NVHPC-24.1-CUDA-12.4.0.eb b/easybuild/easyconfigs/f/FlexiBLAS/FlexiBLAS-3.3.1-NVHPC-24.1-CUDA-12.4.0.eb
@@ -0,0 +1,58 @@
+easyblock = 'Bundle'
+
+name = 'FlexiBLAS'
+version = '3.3.1'
+
+homepage = 'https://gitlab.mpi-magdeburg.mpg.de/software/flexiblas-release'
+description = """FlexiBLAS is a wrapper library that enables the exchange of the BLAS and LAPACK implementation
+used by a program without recompiling or relinking it."""
+
+toolchain = {'name': 'NVHPC', 'version': '24.1-CUDA-12.4.0'}
+local_extra_flags = "-D__ELF__"
+toolchainopts = {'pic': True, 'extra_cflags': local_extra_flags, 'extra_fflags': local_extra_flags}
+
+builddependencies = [
+    ('CMake', '3.27.6'),
+    ('Python', '3.11.5'),  # required for running the tests
+]
+
+dependencies = [
+    ('OpenBLAS', '0.3.24'),
+]
+
+# note: first listed backend will be used as default by FlexiBLAS,
+# unless otherwise specified via easyconfig parameter flexiblas_default
+local_backends = ['OpenBLAS']
+
+# imkl supplies its backend via the imkl module, not as a dependency
+if ARCH == 'x86_64':
+    local_backends.append('imkl')
+
+default_component_specs = {'start_dir': '%(namelower)s-%(version)s'}
+sanity_check_all_components = True
+
+# Also build and install LAPACKE, which FlexiBLAS does not support yet
+components = [
+    (name, version, {
+        'source_urls':
+        ['https://gitlab.mpi-magdeburg.mpg.de/api/v4/projects/386/packages/generic/flexiblas-source/v3.3.1/'],
+        'sources': [SOURCELOWER_TAR_GZ],
+        'checksums': ['bbeebf5e5a006924558fec43f49affbe1aaa4cbacfc472a9ff6066ffda142e18'],
+        'backends': local_backends,
+    }),
+    ('LAPACK', '3.11.0', {
+        'easyblock': 'CMakeMake',
+        'source_urls': ['https://github.com/Reference-LAPACK/lapack/archive/'],
+        'sources': ['v%(version)s.tar.gz'],
+        'checksums': ['4b9ba79bfd4921ca820e83979db76ab3363155709444a787979e81c22285ffa9'],
+        'configopts': ('-DBUILD_SHARED_LIBS=ON -DUSE_OPTIMIZED_BLAS=ON -DLAPACKE=ON '
+                       '-DUSE_OPTIMIZED_LAPACK=ON -DBUILD_DEPRECATED=ON '
+                       '-DCMAKE_INSTALL_INCLUDEDIR=%(installdir)s/include/flexiblas'),
+        'sanity_check_paths': {
+            'files': ['lib/liblapacke.%s' % SHLIB_EXT, 'include/flexiblas/lapacke.h'],
+            'dirs': [],
+        },
+    }),
+]
+
+moduleclass = 'lib'
diff --git a/easybuild/easyconfigs/n/NVHPC/NVHPC-24.1-CUDA-12.4.0.eb b/easybuild/easyconfigs/n/NVHPC/NVHPC-24.1-CUDA-12.4.0.eb
@@ -0,0 +1,80 @@
+name = 'NVHPC'
+version = '24.1'
+versionsuffix = '-CUDA-%(cudaver)s'
+
+homepage = 'https://developer.nvidia.com/hpc-sdk/'
+description = """C, C++ and Fortran compilers included with the NVIDIA HPC SDK (previously: PGI)"""
+
+toolchain = SYSTEM
+
+local_tarball_tmpl = 'nvhpc_2024_%%(version_major)s%%(version_minor)s_Linux_%s_cuda_multi.tar.gz'
+# By downloading, you accept the HPC SDK Software License Agreement
+# https://docs.nvidia.com/hpc-sdk/eula/index.html
+accept_eula = True
+source_urls = ['https://developer.download.nvidia.com/hpc-sdk/%(version)s/']
+sources = [local_tarball_tmpl % '%(arch)s']
+checksums = [
+    {
+        local_tarball_tmpl % 'aarch64':
+            '8c2ce561d5901a03eadce7f07dce5fbc55e8e88c87b74cf60e01e2eca231c41c',
+        local_tarball_tmpl % 'ppc64le':
+            'e7330eb35e23dcd9b0b3bedc67c0d5443c4fd76b59caa894a76ecb0d17f71f43',
+        local_tarball_tmpl % 'x86_64':
+            '27992e5fd56af8738501830daddc5e9510ebd553326fea8730236fee4f0f1dd8',
+    }
+]
+
+local_gccver = '13.2.0'
+dependencies = [
+    ('GCCcore', local_gccver),
+    ('binutils', '2.40', '', ('GCCcore', local_gccver)),
+    # This is necessary to avoid cases where just libnuma.so.1 is present in the system and -lnuma fails
+    ('numactl', '2.0.16', '', ('GCCcore', local_gccver)),
+    ('CUDA', '12.4.0', '', SYSTEM),
+]
+
+module_add_cuda = False
+
+# specify default CUDA version that should be used by NVHPC
+# should match one of the CUDA versions that are included with this NVHPC version
+# (see install_components/Linux_x86_64/$version/cuda/) where $version is the NVHPC version
+# this version can be tweaked from the EasyBuild command line with
+# --try-amend=default_cuda_version="11.0" (for example)
+default_cuda_version = '%(cudaver)s'
+
+# NVHPC EasyBlock supports some features, which can be set via CLI or this easyconfig.
+# The following list gives examples for the easyconfig
+#
+# NVHPC needs CUDA to work. Two options are available: 1) Use NVHPC-bundled CUDA, 2) use system CUDA
+# 1) Bundled CUDA
+#    If no easybuild dependency to CUDA is present, the bundled CUDA is taken. A version needs to be specified with
+#      default_cuda_version = "11.0"
+#    in this easyconfig file; alternatively, it can be specified through the command line during installation with
+#      --try-amend=default_cuda_version="10.2"
+# 2) CUDA provided via EasyBuild
+#    Use CUDA as a dependency, for example
+#      dependencies = [('CUDA', '11.5.0')]
+#    The parameter default_cuda_version still can be set as above.
+#    If not set, it will be deduced from the CUDA module (via $EBVERSIONCUDA)
+#
+# Define a NVHPC-default Compute Capability
+#   cuda_compute_capabilities = "8.0"
+# Can also be specified on the EasyBuild command line via --cuda-compute-capabilities=8.0
+# Only single values supported, not lists of values!
+#
+# Options to add/remove things to/from environment module (defaults shown)
+#   module_byo_compilers = False  # Remove compilers from PATH (Bring-your-own compilers)
+#   module_nvhpc_own_mpi = False  # Add NVHPC's own pre-compiled OpenMPI
+#   module_add_math_libs = False  # Add NVHPC's math libraries (which should be there from CUDA anyway)
+#   module_add_profilers = False  # Add NVHPC's NVIDIA Profilers
+#   module_add_nccl = False       # Add NVHPC's NCCL library
+#   module_add_nvshmem = False    # Add NVHPC's NVSHMEM library
+#   module_add_cuda = False       # Add NVHPC's bundled CUDA
+
+modextrapaths = {
+    'LD_LIBRARY_PATH': ['Linux_x86_64/%(version)s/compilers/extras/qd/lib'],
+    'LIBRARY_PATH': ['Linux_x86_64/%(version)s/compilers/extras/qd/lib'],
+}
+
+# this bundle serves as a compiler-only toolchain, so it should be marked as compiler (important for HMNS)
+moduleclass = 'compiler'
diff --git a/easybuild/easyconfigs/n/nvofbf/nvofbf-2023b.eb b/easybuild/easyconfigs/n/nvofbf/nvofbf-2023b.eb
@@ -0,0 +1,29 @@
+# This file is an EasyBuild reciPY as per https://easybuilders.github.io/easybuild/
+# Author: Tanmoy Chakraborty (University of Warwick)
+# Email: tanmoy.chakraborty@warwick.ac.uk
+
+easyblock = 'Toolchain'
+
+name = 'nvofbf'
+version = '2023b'
+
+homepage = '(none)'
+description = """NVHPC based toolchain, including OpenMPI for MPI support,
+OpenBLAS (via FlexiBLAS for BLAS and LAPACK support), FFTW and ScaLAPACK."""
+
+toolchain = SYSTEM
+
+local_compiler = ('NVHPC', '24.1-CUDA-12.4.0')
+
+local_comp_mpi_tc = ('nvompi', version)
+
+dependencies = [
+    local_compiler,
+    ('OpenMPI', '4.1.6', '-CUDA-12.4.0', local_compiler),
+    ('FlexiBLAS', '3.3.1', '', local_compiler),
+    ('FFTW', '3.3.10', '', local_compiler),
+    ('FFTW.MPI', '3.3.10', '', local_comp_mpi_tc),
+    ('ScaLAPACK', '2.2.0', '-fb', local_comp_mpi_tc),
+]
+
+moduleclass = 'toolchain'
diff --git a/easybuild/easyconfigs/n/nvompi/nvompi-2023b.eb b/easybuild/easyconfigs/n/nvompi/nvompi-2023b.eb
@@ -0,0 +1,23 @@
+# This file is an EasyBuild reciPY as per https://easybuilders.github.io/easybuild/
+# Author: Tanmoy Chakraborty (University of Warwick)
+# Email: tanmoy.chakraborty@warwick.ac.uk
+
+easyblock = 'Toolchain'
+
+name = 'nvompi'
+version = '2023b'
+
+homepage = '(none)'
+description = 'NVHPC based compiler toolchain, including OpenMPI for MPI support.'
+
+toolchain = SYSTEM
+
+local_compiler = ('NVHPC', '24.1-CUDA-12.4.0')
+
+dependencies = [
+    local_compiler,
+    ('OpenMPI', '4.1.6', '-CUDA-12.4.0', local_compiler),
+    ('CUDA', '12.4.0', '', SYSTEM),
+]
+
+moduleclass = 'toolchain'
diff --git a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-NVHPC-24.1-CUDA-12.4.0.eb b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-NVHPC-24.1-CUDA-12.4.0.eb
@@ -0,0 +1,53 @@
+name = 'OpenBLAS'
+version = '0.3.24'
+
+homepage = 'http://www.openblas.net/'
+description = "OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version."
+
+toolchain = {'name': 'NVHPC', 'version': '24.1-CUDA-12.4.0'}
+
+source_urls = [
+    # order matters, trying to download the large.tgz/timing.tgz LAPACK tarballs from GitHub causes trouble
+    'https://www.netlib.org/lapack/timing/',
+    'https://github.com/xianyi/OpenBLAS/archive/',
+]
+sources = ['v%(version)s.tar.gz']
+patches = [
+    ('large.tgz', '.'),
+    ('timing.tgz', '.'),
+    'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch',
+    'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch',
+    'OpenBLAS-0.3.21_fix-order-vectorization.patch',
+    'OpenBLAS-0.3.23_disable-xDRGES-LAPACK-test.patch',
+    'OpenBLAS-0.3.24_fix-czasum.patch',
+    'OpenBLAS-0.3.24_fix-A64FX.patch',
+]
+checksums = [
+    {'v0.3.24.tar.gz': 'ceadc5065da97bd92404cac7254da66cc6eb192679cf1002098688978d4d5132'},
+    {'large.tgz': 'f328d88b7fa97722f271d7d0cfea1c220e0f8e5ed5ff01d8ef1eb51d6f4243a1'},
+    {'timing.tgz': '999c65f8ea8bd4eac7f1c7f3463d4946917afd20a997807300fe35d70122f3af'},
+    {'OpenBLAS-0.3.15_workaround-gcc-miscompilation.patch':
+     'e6b326fb8c4a8a6fd07741d9983c37a72c55c9ff9a4f74a80e1352ce5f975971'},
+    {'OpenBLAS-0.3.20_use-xASUM-microkernels-on-new-intel-cpus.patch':
+     '1dbd0f9473963dbdd9131611b455d8a801f1e995eae82896186d3d3ffe6d5f03'},
+    {'OpenBLAS-0.3.21_fix-order-vectorization.patch':
+     '08af834e5d60441fd35c128758ed9c092ba6887c829e0471ecd489079539047d'},
+    {'OpenBLAS-0.3.23_disable-xDRGES-LAPACK-test.patch':
+     'ab7e0af05f9b2a2ced32f3875e1e3767d9c3531a455421a38f7324350178a0ff'},
+    {'OpenBLAS-0.3.24_fix-czasum.patch': '8132b87c519fb08caa3bd7291fe8a1d0e1afe6fcb667d16f3020b46122afe20c'},
+    {'OpenBLAS-0.3.24_fix-A64FX.patch': '3712e8c3f0024c7bb327958779c388ad0234ad6d58b7b118e605256ec089964c'},
+]
+
+builddependencies = [
+    ('make', '4.4.1'),
+    # required by LAPACK test suite
+    ('Python', '3.11.5'),
+]
+
+run_lapack_tests = False
+max_failing_lapack_tests_num_errors = 150
+
+# extensive testing can be enabled by uncommenting the line below
+# runtest = 'PATH=.:$PATH lapack-timing'
+
+moduleclass = 'numlib'
diff --git a/easybuild/easyconfigs/o/OpenMPI/OpenMPI-4.1.6-NVHPC-24.1-CUDA-12.4.0-CUDA-12.4.0.eb b/easybuild/easyconfigs/o/OpenMPI/OpenMPI-4.1.6-NVHPC-24.1-CUDA-12.4.0-CUDA-12.4.0.eb
@@ -0,0 +1,77 @@
+name = 'OpenMPI'
+version = '4.1.6'
+versionsuffix = '-CUDA-12.4.0'
+
+homepage = 'https://www.open-mpi.org/'
+description = """The Open MPI Project is an open source MPI-3 implementation."""
+
+toolchain = {'name': 'NVHPC', 'version': '24.1-CUDA-12.4.0'}
+toolchainopts = {'pic': True}
+
+source_urls = ['https://www.open-mpi.org/software/ompi/v%(version_major_minor)s/downloads']
+sources = [SOURCELOWER_TAR_BZ2]
+patches = [
+    'OpenMPI-4.1.1_build-with-internal-cuda-header.patch',
+    'OpenMPI-4.1.1_opal-datatype-cuda-performance.patch',
+    'OpenMPI-4.1.x_add_atomic_wmb.patch',
+]
+checksums = [
+    {'openmpi-4.1.6.tar.bz2': 'f740994485516deb63b5311af122c265179f5328a0d857a567b85db00b11e415'},
+    {'OpenMPI-4.1.1_build-with-internal-cuda-header.patch':
+     '63eac52736bdf7644c480362440a7f1f0ae7c7cae47b7565f5635c41793f8c83'},
+    {'OpenMPI-4.1.1_opal-datatype-cuda-performance.patch':
+     'b767c7166cf0b32906132d58de5439c735193c9fd09ec3c5c11db8d5fa68750e'},
+    {'OpenMPI-4.1.x_add_atomic_wmb.patch': '9494bbc546d661ba5189e44b4c84a7f8df30a87cdb9d96ce2e73a7c8fecba172'},
+]
+
+builddependencies = [
+    ('pkgconf', '2.0.3'),
+    ('Perl', '5.38.0'),
+    ('Autotools', '20220317'),
+]
+
+dependencies = [
+    ('zlib', '1.2.13'),
+    ('hwloc', '2.9.2'),
+    ('libevent', '2.1.12'),
+    ('UCX', '1.15.0'),
+    ('libfabric', '1.19.0'),
+    ('PMIx', '4.2.6'),
+    ('UCC', '1.2.0'),
+    ('UCC-CUDA', '1.2.0', '-CUDA-%(cudaver)s'),
+]
+
+# Update configure to include changes from the "internal-cuda" patch
+# by running a subset of autogen.pl sufficient to achieve this
+# without doing the full, long-running regeneration.
+preconfigopts = ' && '.join([
+    'cd config',
+    'autom4te --language=m4sh opal_get_version.m4sh -o opal_get_version.sh',
+    'cd ..',
+    'autoconf',
+    'autoheader',
+    'aclocal',
+    'automake',
+    ''
+])
+
+cuda_compute_capabilities = ['8.0', '8.9']
+
+# CUDA related patches and custom configure option can be removed if CUDA support isn't wanted.
+configopts = '--with-cuda=$EBROOTCUDACORE '
+
+# disable MPI1 compatibility for now, see what breaks...
+# configopts += '--enable-mpi1-compatibility '
+
+# to enable SLURM integration (site-specific)
+# configopts += '--with-slurm --with-pmi=/usr/include/slurm --with-pmi-libdir=/usr'
+
+configopts += '--with-slurm '
+
+modextravars = {
+    'OMPI_MCA_btl': '^ofi',
+    'OMPI_MCA_mtl': '^ofi',
+    'SLURM_MPI_TYPE': 'pmix',
+}
+
+moduleclass = 'mpi'