From 9164c61111101a2623db91900efc8ad76a9a2271 Mon Sep 17 00:00:00 2001 From: thoffman Date: Thu, 28 Nov 2024 14:42:09 +0100 Subject: [PATCH 01/32] adding easyconfigs: jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb and patches: jax-0.4.35_easyblock_compat.patch, jax-0.4.35_fix-pybind11-systemlib_cupti.patch --- .../jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb | 162 ++++++++++++++++++ .../j/jax/jax-0.4.35_easyblock_compat.patch | 21 +++ ...-0.4.35_fix-pybind11-systemlib_cupti.patch | 67 ++++++++ 3 files changed, 250 insertions(+) create mode 100644 easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb create mode 100644 easybuild/easyconfigs/j/jax/jax-0.4.35_easyblock_compat.patch create mode 100644 easybuild/easyconfigs/j/jax/jax-0.4.35_fix-pybind11-systemlib_cupti.patch diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb new file mode 100644 index 00000000000..2aaf549cc4f --- /dev/null +++ b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb @@ -0,0 +1,162 @@ +# This file is an EasyBuild reciPY as per https://github.com/easybuilders/easybuild +# Author: Denis Kristak +# Updated by: Alex Domingo (Vrije Universiteit Brussel) +# Updated by: Pavel Tománek (INUITS) +# Updated by: Thomas Hoffmann (EMBL Heidelberg) +easyblock = 'PythonBundle' + +name = 'jax' +version = '0.4.35' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://jax.readthedocs.io/' +description = """Composable transformations of Python+NumPy programs: +differentiate, vectorize, JIT to GPU/TPU, and more""" + +toolchain = {'name': 'gfbf', 'version': '2024a'} +cuda_compute_capabilities = ["5.0", "6.0", "6.1", "7.0", "7.5", "8.0", "8.6", "9.0"] + +builddependencies = [ + # ('Bazel', '7.4.1'), TODO: problems with py. 6.5.0 works. + ('pybind11', '2.13.6'), # 2.12.0 ? SciPy-bundle has pybind/2.12.0. Fix: change to builddependency in SciPy-bundle? + ('pytest-xdist', '3.6.1'), + ('git', '2.45.1'), # bazel uses git to fetch repositories + ('matplotlib', '3.9.2'), # required for tests/lobpcg_test.py + ('poetry', '1.8.3'), + ('Clang', '18.1.8') +] + +dependencies = [ + ('CUDA', '12.6.0', '', SYSTEM), # 12.6.2 ? + ('cuDNN', '9.5.0.50', versionsuffix, SYSTEM), + ('NCCL', '2.22.3', versionsuffix), + ('Python', '3.12.3'), + ('SciPy-bundle', '2024.05'), # 2024.11 ? + ('absl-py', '2.1.0'), + ('flatbuffers-python', '24.3.25'), + ('ml_dtypes', '0.5.0'), + ('zlib', '1.3.1'), + ('pybind11', '2.13.6'), # override 2.12.0. SciPy-bundle has pybind/2.12.0. Fix: + # change to builddependency in SciPy-bundle? (TODO) +] + +# downloading xla and other tarballs to avoid that Bazel downloads it during the build +local_extract_cmd = 'mkdir -p %(builddir)s/archives && cp %s %(builddir)s/archives' +# note: following commits *must* be the exact same onces used upstream +# XLA_COMMIT from jax-jaxlib: third_party/xla/workspace.bzl +local_xla_commit = '76da730179313b3bebad6dea6861768421b7358c' +# TFRT_COMMIT from xla: third_party/tsl/third_party/tf_runtime/workspace.bzl +local_tfrt_commit = '0aeefb1660d7e37964b2bb71b1f518096bda9a25' # TODO: still required? +# TODO: add other downloads + +# Use sources downloaded by EasyBuild +_jaxlib_buildopts = '--bazel_options="--distdir=%(builddir)s/archives" ' +# Use dependencies from EasyBuild +_jaxlib_buildopts += '--bazel_options="--action_env=TF_SYSTEM_LIBS=pybind11" ' +_jaxlib_buildopts += '--bazel_options="--action_env=CPATH=$EBROOTPYBIND11/include:$EBROOTCUDA/extras/CUPTI/include" ' +# Avoid warning (treated as error) in upb/table.c +_jaxlib_buildopts += '--bazel_options="--copt=-Wno-maybe-uninitialized" ' # TODO: still required? +# _jaxlib_buildopts += '--nouse_clang ' #TODO: avoid clang (?) +_jaxlib_buildopts += '--cuda_version=%(cudaver)s ' +_jaxlib_buildopts += '--python_bin_path=$EBROOTPYTHON/bin/python3 ' +# Do not use hermetic CUDA/cuDNN/NCCL: (requires action_env=CPATH=$EBROOTCUDA/extras/CUPTI/include"; +# requires patch of external/xla/xla/tsl/cuda/cupti_stub.cc and jaxlib/gpu/vendor.h (#include ): +_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDNN_PATH="$EBROOTCUDNN" """ +_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_NCCL_PATH="$EBROOTNCCL" """ +_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDA_PATH="$EBROOTCUDA" """ +_jaxlib_buildopts += """--bazel_options="--copt=-Ithird_party/gpus/cuda/extras/CUPTI/include" """ + +# get rid of .devDate versionsuffix: TODO: find a better way +# _no_devtag = """ export JAX_RELEASE && export JAXLIB_RELEASE && """ does not work (?) +_no_devtag = """ sed -i "s/version=__version__/version='%(version)s'/g" setup.py && """ +_jaxlib_buildopts += """--bazel_options="--action_env=JAXLIB_RELEASE=1" """ # required? + +components = [ + ('jaxlib', version, { + 'sources': [ + { + 'source_urls': ['https://github.com/google/jax/archive/'], + 'filename': 'jax-v%(version)s.tar.gz', + }, + { + 'source_urls': ['https://github.com/openxla/xla/archive'], + 'download_filename': '%s.tar.gz' % local_xla_commit, + 'filename': 'xla-%s.tar.gz' % local_xla_commit[:8], + 'extract_cmd': local_extract_cmd, + }, + { + 'source_urls': ['https://github.com/tensorflow/runtime/archive'], + 'download_filename': '%s.tar.gz' % local_tfrt_commit, + 'filename': 'tf_runtime-%s.tar.gz' % local_tfrt_commit[:8], + 'extract_cmd': local_extract_cmd, + }, + ], + 'patches': [ + 'jax-0.4.35_easyblock_compat.patch', + 'jax-0.4.35_fix-pybind11-systemlib_cupti.patch', + 'jax-v0.4.35_version.patch', + ], + 'checksums': [ + {'jax-v0.4.35.tar.gz': + '65e086708ae56670676b7b2340ad82b901d8c9993d1241a839c8990bdb8d6212'}, + {'xla-76da7301.tar.gz': + 'd67ced09b69ab8d7b26fa4cd5f48b22db57eb330294a35f6e1d462ee17066757'}, + {'tf_runtime-0aeefb16.tar.gz': + 'a3df827d7896774cb1d80bf4e1c79ab05c268f29bd4d3db1fb5a4b9c2079d8e3'}, + {'jax-0.4.35_easyblock_compat.patch': + 'cbf4ad92b8438c4ce2a975efce1c47c57d4c3b117bceee071ab660f964057223'}, + {'jax-0.4.35_fix-pybind11-systemlib_cupti.patch': + '78efe6b5108a5da1935258286c94dea8438fd03651533c34023eeba27f514130'}, + ], + 'start_dir': 'jax-jax-v%(version)s', + 'buildopts': _jaxlib_buildopts, + 'prebuildopts': ' mkdir third_party/gpus/cuda/extras/ -p && ' + + 'ln -s $EBROOTCUDA/extras/CUPTI third_party/gpus/cuda/extras --relative &&' + + _no_devtag + }), +] + +# Some tests require an isolated run: TODO: still required? +local_isolated_tests = [ + 'tests/host_callback_test.py::HostCallbackTapTest::test_tap_scan_custom_jvp', + 'tests/host_callback_test.py::HostCallbackTapTest::test_tap_transforms_doc', + 'tests/lax_scipy_special_functions_test.py::LaxScipySpcialFunctionsTest' + + '::testScipySpecialFun_gammainc_s_2x1x4_float32_float32', +] +# deliberately not testing in parallel, as that results in (additional) failing tests; +# use XLA_PYTHON_CLIENT_ALLOCATOR=platform to allocate and deallocate GPU memory during testing, +# see https://github.com/google/jax/issues/7323 and +# https://github.com/google/jax/blob/main/docs/gpu_memory_allocation.rst; +# use CUDA_VISIBLE_DEVICES=0 to avoid failing tests on systems with multiple GPUs; +# use NVIDIA_TF32_OVERRIDE=0 to avoid loosing numerical precision by disabling TF32 Tensor Cores; +local_test_exports = [ + "NVIDIA_TF32_OVERRIDE=0", + "CUDA_VISIBLE_DEVICES=0", + "XLA_PYTHON_CLIENT_ALLOCATOR=platform", + "JAX_ENABLE_X64=true", +] +local_test = ''.join(['export %s;' % x for x in local_test_exports]) +# run all tests at once except for local_isolated_tests: +local_test += "pytest -vv tests %s && " % ' '.join(['--deselect %s' % x for x in local_isolated_tests]) +# run remaining local_isolated_tests separately: +local_test += ' && '.join(['pytest -vv %s' % x for x in local_isolated_tests]) + +use_pip = True + +exts_list = [ + (name, version, { + 'source_tmpl': '%(name)s-v%(version)s.tar.gz', + 'source_urls': ['https://github.com/google/jax/archive/'], + # 'patches': ['jax-0.4.25_fix_env_test_no_log_spam.patch'], # TODO: still required? update? + 'patches': ['jax-v0.4.35_version.patch'], + 'checksums': [ + {'jax-v0.4.35.tar.gz': '65e086708ae56670676b7b2340ad82b901d8c9993d1241a839c8990bdb8d6212'}, + ], + 'runtest': local_test, + 'preinstallopts': _no_devtag + }), +] + +sanity_pip_check = True + +moduleclass = 'ai' diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35_easyblock_compat.patch b/easybuild/easyconfigs/j/jax/jax-0.4.35_easyblock_compat.patch new file mode 100644 index 00000000000..5ea951be927 --- /dev/null +++ b/easybuild/easyconfigs/j/jax/jax-0.4.35_easyblock_compat.patch @@ -0,0 +1,21 @@ +# Thomas Hoffmann, EMBL Heidelberg, structures-it@embl.de, 2024/11 +# add dummy parameters to build/build.py for cudnn_path and cuda_path, which are set by default by the jaxlib easyblock. +diff -ru jax-jax-v0.4.35/build/build.py jax-jax-v0.4.35_easyblockcompat/build/build.py +--- jax-jax-v0.4.35/build/build.py 2024-10-22 21:00:23.000000000 +0200 ++++ jax-jax-v0.4.35_easyblockcompat/build/build.py 2024-11-19 12:35:46.524479324 +0100 +@@ -549,6 +549,15 @@ + help_str="Same as update_requirements, but will consider dev, nightly " + "and pre-release versions of packages.") + ++ parser.add_argument( ++ "--cuda_path", ++ default="dummy", ++ help="compatibility with jaxlib.py easyblock") ++ parser.add_argument( ++ "--cudnn_path", ++ default="dummy", ++ help="compatibility with jaxlib.py easyblock") ++ + args = parser.parse_args() + + logging.basicConfig() diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35_fix-pybind11-systemlib_cupti.patch b/easybuild/easyconfigs/j/jax/jax-0.4.35_fix-pybind11-systemlib_cupti.patch new file mode 100644 index 00000000000..265f9d48636 --- /dev/null +++ b/easybuild/easyconfigs/j/jax/jax-0.4.35_fix-pybind11-systemlib_cupti.patch @@ -0,0 +1,67 @@ +jax-0.4.25_fix-pybind11-systemlib.patch: Add missing value for System Pybind11 Bazel config +jax-0.4.25_fix-pybind11-systemlib.patch: Author: Alexander Grund (TU Dresden) + +THEMBL: fix cupti include path. + +diff --git a/third_party/xla/fix-pybind11-systemlib.patch b/third_party/xla/fix-pybind11-systemlib.patch +new file mode 100644 +index 000000000..68bd2063d +--- /dev/null ++++ b/third_party/xla/fix-pybind11-systemlib.patch +@@ -0,0 +1,13 @@ ++--- xla-orig/third_party/tsl/third_party/systemlibs/pybind11.BUILD +++++ xla-4ccfe33c71665ddcbca5b127fefe8baa3ed632d4/third_party/tsl/third_party/systemlibs/pybind11.BUILD ++@@ -6,3 +6,10 @@ ++ "@tsl//third_party/python_runtime:headers", ++ ], ++ ) +++ +++# Needed by pybind11_bazel. +++config_setting( +++ name = "osx", +++ constraint_values = ["@platforms//os:osx"], +++) +++ +diff -ruN jax-jax-v0.4.35/jaxlib/gpu/vendor.h jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib/jaxlib/gpu/vendor.h +--- jax-jax-v0.4.35/jaxlib/gpu/vendor.h 2024-10-22 21:00:23.000000000 +0200 ++++ jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib/jaxlib/gpu/vendor.h 2024-11-26 10:56:20.396087442 +0100 +@@ -23,7 +23,7 @@ + #if defined(JAX_GPU_CUDA) + + // IWYU pragma: begin_exports +-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" ++#include + #include "third_party/gpus/cuda/include/cooperative_groups.h" + #include "third_party/gpus/cuda/include/cuComplex.h" + #include "third_party/gpus/cuda/include/cublas_v2.h" +diff -ruN jax-jax-v0.4.35/third_party/xla/workspace.bzl jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib/third_party/xla/workspace.bzl +--- jax-jax-v0.4.35/third_party/xla/workspace.bzl 2024-10-22 21:00:23.000000000 +0200 ++++ jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib/third_party/xla/workspace.bzl 2024-11-27 12:17:37.913466273 +0100 +@@ -30,6 +30,11 @@ + sha256 = XLA_SHA256, + strip_prefix = "xla-{commit}".format(commit = XLA_COMMIT), + urls = tf_mirror_urls("https://github.com/openxla/xla/archive/{commit}.tar.gz".format(commit = XLA_COMMIT)), ++ patch_file = [ ++ "//third_party/xla:xla-76da73_cupti.patch", ++ "//third_party/xla:fix-pybind11-systemlib.patch", ++ ], ++ + ) + + # For development, one often wants to make changes to the TF repository as well +diff -ruN jax-jax-v0.4.35/third_party/xla/xla-76da73_cupti.patch jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib/third_party/xla/xla-76da73_cupti.patch +--- jax-jax-v0.4.35/third_party/xla/xla-76da73_cupti.patch 1970-01-01 01:00:00.000000000 +0100 ++++ jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib/third_party/xla/xla-76da73_cupti.patch 2024-11-27 12:18:26.668582799 +0100 +@@ -0,0 +1,12 @@ ++diff -ru xla-76da730179313b3bebad6dea6861768421b7358c/xla/tsl/cuda/cupti_stub.cc xla-76da730179313b3bebad6dea6861768421b7358c_cupti/xla/tsl/cuda/cupti_stub.cc ++--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/tsl/cuda/cupti_stub.cc 2024-10-21 20:29:31.000000000 +0200 +++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti/xla/tsl/cuda/cupti_stub.cc 2024-11-26 12:04:11.695539146 +0100 ++@@ -13,7 +13,7 @@ ++ limitations under the License. ++ ==============================================================================*/ ++ ++-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" +++#include ++ #include "third_party/gpus/cuda/include/cuda.h" ++ #include "tsl/platform/dso_loader.h" ++ #include "tsl/platform/load_library.h" From 179493f9cb1079574362b56ecba4a93d4dbf98c9 Mon Sep 17 00:00:00 2001 From: thoffman Date: Thu, 28 Nov 2024 14:47:59 +0100 Subject: [PATCH 02/32] add ml_dtypes v0.5.0 --- .../m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb diff --git a/easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb b/easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb new file mode 100644 index 00000000000..5a20f1d8458 --- /dev/null +++ b/easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb @@ -0,0 +1,56 @@ +# Thomas Hoffmann, EMBL Heidelberg, structures-it@embl.de, 2024/11 +easyblock = 'PythonBundle' + +name = 'ml_dtypes' +version = '0.5.0' + +homepage = 'https://github.com/jax-ml/ml_dtypes' +description = """ +ml_dtypes is a stand-alone implementation of several NumPy dtype extensions used +in machine learning libraries, including: + +bfloat16: an alternative to the standard float16 format +float8_*: several experimental 8-bit floating point representations including: +float8_e4m3b11fnuz +float8_e4m3fn +float8_e4m3fnuz +float8_e5m2 +float8_e5m2fnuz +""" + +toolchain = {'name': 'gfbf', 'version': '2024a'} + +builddependencies = [ + ('poetry', '1.8.3'), +] + +dependencies = [ + ('Python', '3.12.3'), + # ('SciPy-bundle', '2024.11'), ? + ('SciPy-bundle', '2024.05'), +] + + +use_pip = True + +default_easyblock = 'PythonPackage' + +exts_list = [ + ('opt_einsum', '3.4.0', { + 'checksums': ['96ca72f1b886d148241348783498194c577fa30a8faac108586b14f1ba4473ac'], + }), + ('etils', '1.10.0', { + 'checksums': ['4eaa9d7248fd4eeb75e44d47ca29875a5ccea044cc14a17435794bf8ac116a05'], + }), + (name, version, { + 'patches': [('ml_dtypes-0.3.2_EigenAvx512.patch', 1)], + 'checksums': [ + {'ml_dtypes-0.5.0.tar.gz': '3e7d3a380fe73a63c884f06136f8baa7a5249cc8e9fdec677997dd78549f8128'}, + {'ml_dtypes-0.3.2_EigenAvx512.patch': '197b05b0b7f611749824369f026099f6a172f9e8eab6ebb6504a16573746c892'}, + ], + }), +] + +sanity_pip_check = True + +moduleclass = 'tools' From d9668f21e0fc8d2771567e8626e53096e19fb9d5 Mon Sep 17 00:00:00 2001 From: thoffman Date: Thu, 28 Nov 2024 15:16:57 +0100 Subject: [PATCH 03/32] fix style --- .../easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb index 2aaf549cc4f..3b076a03237 100644 --- a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb +++ b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb @@ -69,7 +69,7 @@ _jaxlib_buildopts += """--bazel_options="--copt=-Ithird_party/gpus/cuda/extras/C # get rid of .devDate versionsuffix: TODO: find a better way # _no_devtag = """ export JAX_RELEASE && export JAXLIB_RELEASE && """ does not work (?) _no_devtag = """ sed -i "s/version=__version__/version='%(version)s'/g" setup.py && """ -_jaxlib_buildopts += """--bazel_options="--action_env=JAXLIB_RELEASE=1" """ # required? +_jaxlib_buildopts += """--bazel_options="--action_env=JAXLIB_RELEASE=1" """ # required? components = [ ('jaxlib', version, { From 794c15d0c230f3583da5e0aa1bdc247208991fa2 Mon Sep 17 00:00:00 2001 From: thoffman Date: Thu, 28 Nov 2024 18:48:00 +0100 Subject: [PATCH 04/32] checksums; add missing patch --- .../jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb | 7 +++++-- .../j/jax/jax-0.4.35_version.patch | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 easybuild/easyconfigs/j/jax/jax-0.4.35_version.patch diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb index 3b076a03237..0ea4ab7fa0b 100644 --- a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb +++ b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb @@ -94,7 +94,7 @@ components = [ 'patches': [ 'jax-0.4.35_easyblock_compat.patch', 'jax-0.4.35_fix-pybind11-systemlib_cupti.patch', - 'jax-v0.4.35_version.patch', + 'jax-0.4.35_version.patch', ], 'checksums': [ {'jax-v0.4.35.tar.gz': @@ -107,6 +107,8 @@ components = [ 'cbf4ad92b8438c4ce2a975efce1c47c57d4c3b117bceee071ab660f964057223'}, {'jax-0.4.35_fix-pybind11-systemlib_cupti.patch': '78efe6b5108a5da1935258286c94dea8438fd03651533c34023eeba27f514130'}, + {'jax-0.4.35_version.patch': + 'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'}, ], 'start_dir': 'jax-jax-v%(version)s', 'buildopts': _jaxlib_buildopts, @@ -148,9 +150,10 @@ exts_list = [ 'source_tmpl': '%(name)s-v%(version)s.tar.gz', 'source_urls': ['https://github.com/google/jax/archive/'], # 'patches': ['jax-0.4.25_fix_env_test_no_log_spam.patch'], # TODO: still required? update? - 'patches': ['jax-v0.4.35_version.patch'], + 'patches': ['jax-0.4.35_version.patch'], 'checksums': [ {'jax-v0.4.35.tar.gz': '65e086708ae56670676b7b2340ad82b901d8c9993d1241a839c8990bdb8d6212'}, + {'jax-0.4.35_version.patch': 'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'}, ], 'runtest': local_test, 'preinstallopts': _no_devtag diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35_version.patch b/easybuild/easyconfigs/j/jax/jax-0.4.35_version.patch new file mode 100644 index 00000000000..e5853c16ab4 --- /dev/null +++ b/easybuild/easyconfigs/j/jax/jax-0.4.35_version.patch @@ -0,0 +1,19 @@ +diff -ru jax-jax-v0.4.35/jax/version.py jax-jax-v0.4.35_version/jax/version.py +--- jax-jax-v0.4.35/jax/version.py 2024-10-22 21:00:23.000000000 +0200 ++++ jax-jax-v0.4.35_version/jax/version.py 2024-11-28 13:10:52.508536023 +0100 +@@ -33,6 +33,7 @@ + def _get_version_string() -> str: + # The build/source distribution for jax & jaxlib overwrites _release_version. + # In this case we return it directly. ++ return _version + if _release_version is not None: + return _release_version + return _version_from_git_tree(_version) or _version_from_todays_date(_version) +@@ -71,6 +72,7 @@ + - if JAX_NIGHTLY or JAXLIB_NIGHTLY are set: version looks like "0.4.16.dev20230906" + - if none are set: version looks like "0.4.16.dev20230906+ge58560fdc + """ ++ return _version + if _release_version is not None: + return _release_version + if os.environ.get('JAX_NIGHTLY') or os.environ.get('JAXLIB_NIGHTLY'): From 572440753a568f06f75dea335332dda0ad58eb06 Mon Sep 17 00:00:00 2001 From: thoffman Date: Thu, 28 Nov 2024 18:54:55 +0100 Subject: [PATCH 05/32] borrow pybind11/2.13.6 from PR #21864 --- .../p/pybind11/pybind11-2.13.6-GCC-13.3.0.eb | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 easybuild/easyconfigs/p/pybind11/pybind11-2.13.6-GCC-13.3.0.eb diff --git a/easybuild/easyconfigs/p/pybind11/pybind11-2.13.6-GCC-13.3.0.eb b/easybuild/easyconfigs/p/pybind11/pybind11-2.13.6-GCC-13.3.0.eb new file mode 100644 index 00000000000..2e698fa2bb7 --- /dev/null +++ b/easybuild/easyconfigs/p/pybind11/pybind11-2.13.6-GCC-13.3.0.eb @@ -0,0 +1,33 @@ +name = 'pybind11' +version = '2.13.6' + +homepage = 'https://pybind11.readthedocs.io' +description = """pybind11 is a lightweight header-only library that exposes C++ types in Python and vice versa, + mainly to create Python bindings of existing C++ code.""" + +toolchain = {'name': 'GCC', 'version': '13.3.0'} + +source_urls = ['https://github.com/pybind/pybind11/archive/'] +sources = ['v%(version)s.tar.gz'] +patches = [ + 'pybind11-2.10.3_require-catch.patch', +] +checksums = [ + {'v2.13.6.tar.gz': 'e08cb87f4773da97fa7b5f035de8763abc656d87d5773e62f6da0587d1f0ec20'}, + {'pybind11-2.10.3_require-catch.patch': '4a27ba3ef1d5c535d120d6178a6e876ae678e4899a07500aab37908357b0b60b'}, +] + +builddependencies = [ + ('CMake', '3.29.3'), + # Test dependencies + ('Eigen', '3.4.0'), + ('Catch2', '2.13.10'), + ('Python-bundle-PyPI', '2024.06'), # to provide pytest +] + +dependencies = [ + ('Boost', '1.85.0'), + ('Python', '3.12.3'), +] + +moduleclass = 'lib' From b910ecb8afde323b88fbd2e78e67a4ffe84be31e Mon Sep 17 00:00:00 2001 From: thoffman Date: Fri, 29 Nov 2024 11:50:23 +0100 Subject: [PATCH 06/32] temporarily add pytest-xdist from #21879 --- .../pytest-xdist-3.6.1-GCCcore-13.3.0.eb | 64 +++++++++++++++++ .../p/pytest/pytest-8.3.3-GCCcore-13.3.0.eb | 72 +++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 easybuild/easyconfigs/p/pytest-xdist/pytest-xdist-3.6.1-GCCcore-13.3.0.eb create mode 100644 easybuild/easyconfigs/p/pytest/pytest-8.3.3-GCCcore-13.3.0.eb diff --git a/easybuild/easyconfigs/p/pytest-xdist/pytest-xdist-3.6.1-GCCcore-13.3.0.eb b/easybuild/easyconfigs/p/pytest-xdist/pytest-xdist-3.6.1-GCCcore-13.3.0.eb new file mode 100644 index 00000000000..254e2c6a8b9 --- /dev/null +++ b/easybuild/easyconfigs/p/pytest-xdist/pytest-xdist-3.6.1-GCCcore-13.3.0.eb @@ -0,0 +1,64 @@ +easyblock = 'PythonBundle' + +name = 'pytest-xdist' +version = '3.6.1' + +homepage = 'https://github.com/pytest-dev/pytest-xdist' +description = """xdist: pytest distributed testing plugin + +The pytest-xdist plugin extends pytest with some unique test execution modes: + + * test run parallelization: if you have multiple CPUs or hosts you + can use those for a combined test run. This allows to speed up + development or to use special resources of remote machines. + + * --looponfail: run your tests repeatedly in a subprocess. After + each run pytest waits until a file in your project changes and + then re-runs the previously failing tests. This is repeated + until all tests pass after which again a full run is + performed. + + * Multi-Platform coverage: you can specify different Python + interpreters or different platforms and run tests in parallel on + all of them. + +Before running tests remotely, pytest efficiently “rsyncs” your +program source code to the remote place. All test results are reported +back and displayed to your local terminal. You may specify different +Python versions and interpreters.""" + +toolchain = {'name': 'GCCcore', 'version': '13.3.0'} + +builddependencies = [ + ('binutils', '2.42'), + ('hatchling', '1.24.2'), +] + +dependencies = [ + ('Python', '3.12.3'), + ('pytest', '8.3.3'), +] + +use_pip = True +sanity_pip_check = True + +exts_list = [ + ('apipkg', '3.0.2', { + 'checksums': ['c7aa61a4f82697fdaa667e70af1505acf1f7428b1c27b891d204ba7a8a3c5e0d'], + }), + ('execnet', '2.1.1', { + 'checksums': ['5189b52c6121c24feae288166ab41b32549c7e2348652736540b9e6e7d4e72e3'], + }), + (name, version, { + 'modulename': 'xdist', + 'source_tmpl': 'pytest_xdist-%(version)s.tar.gz', + 'checksums': ['ead156a4db231eec769737f57668ef58a2084a34b2e55c4a8fa20d861107300d'], + }), +] + +sanity_check_paths = { + 'files': [], + 'dirs': ['lib/python%(pyshortver)s/site-packages'], +} + +moduleclass = 'tools' diff --git a/easybuild/easyconfigs/p/pytest/pytest-8.3.3-GCCcore-13.3.0.eb b/easybuild/easyconfigs/p/pytest/pytest-8.3.3-GCCcore-13.3.0.eb new file mode 100644 index 00000000000..0701b7af348 --- /dev/null +++ b/easybuild/easyconfigs/p/pytest/pytest-8.3.3-GCCcore-13.3.0.eb @@ -0,0 +1,72 @@ +easyblock = 'PythonBundle' + +name = 'pytest' +version = '8.3.3' + +homepage = 'https://docs.pytest.org/en/latest/' +description = """The pytest framework makes it easy to write small, +readable tests, and can scale to support complex functional testing for +applications and libraries.""" + +toolchain = {'name': 'GCCcore', 'version': '13.3.0'} + +builddependencies = [ + ('binutils', '2.42'), + ('hatchling', '1.24.2'), +] + +dependencies = [ + ('Python', '3.12.3'), + ('Python-bundle-PyPI', '2024.06'), +] + +use_pip = True + +exts_default_options = {'source_urls': [PYPI_LOWER_SOURCE]} + +# Note! Some of the file system related tests may fail on shared file systems. +# Notably TestPOSIXLocalPath.test_copy_stat_file, TestPOSIXLocalPath.test_copy_stat_dir +# and test_source_mtime_long_long are known to fail on GPFS +# Build with buildpath and tmpdir set to a local file system to avoid this +# or use --ignore-test-failures +_skip_tests = [ + 'testing/io/test_terminalwriter.py', + 'testing/test_terminal.py', + 'testing/test_debugging.py', + 'testing/test_config.py', + 'testing/test_helpconfig.py', +] +_ignore_tests = ' --ignore='.join(_skip_tests) + +exts_list = [ + ('setuptools-scm', '8.1.0', { + 'source_tmpl': 'setuptools_scm-%(version)s.tar.gz', + 'checksums': ['42dea1b65771cba93b7a515d65a65d8246e560768a66b9106a592c8e7f26c8a7'], + }), + ('flit-core', '3.10.1', { + 'source_tmpl': 'flit_core-%(version)s.tar.gz', + 'checksums': ['66e5b87874a0d6e39691f0e22f09306736b633548670ad3c09ec9db03c5662f7'], + }), + ('hypothesis', '6.119.0', { + 'checksums': ['ca441c6ef55d17f27f642fa08657e80f9c13d9da7ae191c8ad58fbc2f16acd1b'], + }), + ('elementpath', '4.6.0', { + 'checksums': ['ba46bf07f66774927727ade55022b6c435fac06b2523cb3cd7689a1884d33468'], + }), + ('xmlschema', '3.4.3', { + 'checksums': ['0c638dac81c7d6c9da9a8d7544402c48cffe7ee0e13cc47fc0c18794d1395dfb'], + }), + (name, version, { + 'checksums': ['70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181'], + }), +] + +sanity_check_commands = [ + "python -c 'import pytest'", + 'cd %%(builddir)s/%%(name)s/%%(name)s-%%(version)s && %%(installdir)s/bin/pytest --ignore=%s testing' + % _ignore_tests, +] + +sanity_pip_check = True + +moduleclass = 'lib' From aa1ab42bd5e63de4dfd95921e64a56ffba094ac5 Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Fri, 29 Nov 2024 12:06:29 +0100 Subject: [PATCH 07/32] Update jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb - comment bazel 7 problem. - temporarily switch off tests. --- .../easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb index 0ea4ab7fa0b..d2dd4f2261c 100644 --- a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb +++ b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb @@ -17,7 +17,9 @@ toolchain = {'name': 'gfbf', 'version': '2024a'} cuda_compute_capabilities = ["5.0", "6.0", "6.1", "7.0", "7.5", "8.0", "8.6", "9.0"] builddependencies = [ - # ('Bazel', '7.4.1'), TODO: problems with py. 6.5.0 works. + # ('Bazel', '7.4.1'), TODO: problems with @@local_config_python//:py3_runtime: + # Error in fail: interpreter_path must be an absolute path + # Bazel 6.5.0 (download) works. ('pybind11', '2.13.6'), # 2.12.0 ? SciPy-bundle has pybind/2.12.0. Fix: change to builddependency in SciPy-bundle? ('pytest-xdist', '3.6.1'), ('git', '2.45.1'), # bazel uses git to fetch repositories @@ -155,7 +157,8 @@ exts_list = [ {'jax-v0.4.35.tar.gz': '65e086708ae56670676b7b2340ad82b901d8c9993d1241a839c8990bdb8d6212'}, {'jax-0.4.35_version.patch': 'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'}, ], - 'runtest': local_test, + # 'runtest': local_test, + 'runtest': False, # tmp 'preinstallopts': _no_devtag }), ] From 44ebc27ae921e240660b0103491a5adab4f7d1a9 Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Fri, 29 Nov 2024 16:20:04 +0100 Subject: [PATCH 08/32] Update easyconfigs.py alt dep pybind11 --- test/easyconfigs/easyconfigs.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/easyconfigs/easyconfigs.py b/test/easyconfigs/easyconfigs.py index 4b1c4dd770e..de65f9648a8 100644 --- a/test/easyconfigs/easyconfigs.py +++ b/test/easyconfigs/easyconfigs.py @@ -613,6 +613,9 @@ def check_dep_vars(self, gen, dep, dep_vars): # OpenFOAM 5.0 requires older ParaView, CFDEMcoupling depends on OpenFOAM 5.0 (r'5\.4\.1', [r'CFDEMcoupling-3\.8\.0', r'OpenFOAM-5\.0-20180606']), ], + 'pybind11': [ + ('2.13.6;', ['jax-0.4.35']), + ], 'pydantic': [ # GTDB-Tk v2.3.2 requires pydantic 1.x (see https://github.com/Ecogenomics/GTDBTk/pull/530) ('1.10.13;', ['GTDB-Tk-2.3.2-', 'GTDB-Tk-2.4.0-']), From 31eeb756cd3cd48cb699f96f7e917cb4772ae0e4 Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Fri, 29 Nov 2024 22:14:19 +0100 Subject: [PATCH 09/32] Update easyconfigs.py --- test/easyconfigs/easyconfigs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/easyconfigs/easyconfigs.py b/test/easyconfigs/easyconfigs.py index de65f9648a8..58fd227fdfb 100644 --- a/test/easyconfigs/easyconfigs.py +++ b/test/easyconfigs/easyconfigs.py @@ -614,7 +614,7 @@ def check_dep_vars(self, gen, dep, dep_vars): (r'5\.4\.1', [r'CFDEMcoupling-3\.8\.0', r'OpenFOAM-5\.0-20180606']), ], 'pybind11': [ - ('2.13.6;', ['jax-0.4.35']), + ('2.13.6;', ['jax-0.4.35-']), ], 'pydantic': [ # GTDB-Tk v2.3.2 requires pydantic 1.x (see https://github.com/Ecogenomics/GTDBTk/pull/530) From 39e03a30b76e297bd697c3dcb3fb3a9f0b54c999 Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Sat, 30 Nov 2024 08:33:26 +0100 Subject: [PATCH 10/32] Update jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb Temp. mv Pybind11 from builddep to dep --- .../easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb index d2dd4f2261c..1043ab4fdb4 100644 --- a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb +++ b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb @@ -20,7 +20,7 @@ builddependencies = [ # ('Bazel', '7.4.1'), TODO: problems with @@local_config_python//:py3_runtime: # Error in fail: interpreter_path must be an absolute path # Bazel 6.5.0 (download) works. - ('pybind11', '2.13.6'), # 2.12.0 ? SciPy-bundle has pybind/2.12.0. Fix: change to builddependency in SciPy-bundle? + # ('pybind11', '2.13.6'), # 2.12.0 ? SciPy-bundle has pybind/2.12.0. Fix: change to builddependency in SciPy-bundle? ('pytest-xdist', '3.6.1'), ('git', '2.45.1'), # bazel uses git to fetch repositories ('matplotlib', '3.9.2'), # required for tests/lobpcg_test.py From e6bb0e0e361fb7b467e9de04af1fd5713863cf44 Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Mon, 2 Dec 2024 08:46:40 +0100 Subject: [PATCH 11/32] Update jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb --- .../easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb index 1043ab4fdb4..b29ddcea33f 100644 --- a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb +++ b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb @@ -20,7 +20,9 @@ builddependencies = [ # ('Bazel', '7.4.1'), TODO: problems with @@local_config_python//:py3_runtime: # Error in fail: interpreter_path must be an absolute path # Bazel 6.5.0 (download) works. - # ('pybind11', '2.13.6'), # 2.12.0 ? SciPy-bundle has pybind/2.12.0. Fix: change to builddependency in SciPy-bundle? + # ('pybind11', '2.13.6'), # 2.12.0 ? SciPy-bundle has pybind/2.12.0. + # Fix: change to builddependency in SciPy-bundle? + # tmporarily mv to dependencies (TODO: mv back) ('pytest-xdist', '3.6.1'), ('git', '2.45.1'), # bazel uses git to fetch repositories ('matplotlib', '3.9.2'), # required for tests/lobpcg_test.py From 086c5ecc07e035edbbfa304bff1c29258dbf6685 Mon Sep 17 00:00:00 2001 From: thoffman Date: Mon, 2 Dec 2024 10:09:15 +0100 Subject: [PATCH 12/32] temporarily add SciPy-bundle with pybind11 builddependency --- .../s/SciPy-bundle/SciPy-bundle-2024.05-gfbf-2024a.eb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/easyconfigs/s/SciPy-bundle/SciPy-bundle-2024.05-gfbf-2024a.eb b/easybuild/easyconfigs/s/SciPy-bundle/SciPy-bundle-2024.05-gfbf-2024a.eb index 5584d888f30..b15452f6bb5 100644 --- a/easybuild/easyconfigs/s/SciPy-bundle/SciPy-bundle-2024.05-gfbf-2024a.eb +++ b/easybuild/easyconfigs/s/SciPy-bundle/SciPy-bundle-2024.05-gfbf-2024a.eb @@ -18,12 +18,12 @@ builddependencies = [ ('Ninja', '1.12.1'), ('pkgconf', '2.2.0'), # required by scipy ('Cython', '3.0.10'), # required by numpy and scipy + ('pybind11', '2.12.0'), # required by scipy ] dependencies = [ ('Python', '3.12.3'), ('Python-bundle-PyPI', '2024.06'), - ('pybind11', '2.12.0'), # required by scipy ] use_pip = True From 54916df7b039a0131b8737e2a3f0ba31acd026b5 Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Mon, 2 Dec 2024 10:13:20 +0100 Subject: [PATCH 13/32] Update jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb mv pybind11 to builddependencies --- .../easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb index b29ddcea33f..a0c98cd7085 100644 --- a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb +++ b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb @@ -20,7 +20,7 @@ builddependencies = [ # ('Bazel', '7.4.1'), TODO: problems with @@local_config_python//:py3_runtime: # Error in fail: interpreter_path must be an absolute path # Bazel 6.5.0 (download) works. - # ('pybind11', '2.13.6'), # 2.12.0 ? SciPy-bundle has pybind/2.12.0. + ('pybind11', '2.13.6'), # 2.12.0 ? SciPy-bundle has pybind/2.12.0. # Fix: change to builddependency in SciPy-bundle? # tmporarily mv to dependencies (TODO: mv back) ('pytest-xdist', '3.6.1'), @@ -40,8 +40,8 @@ dependencies = [ ('flatbuffers-python', '24.3.25'), ('ml_dtypes', '0.5.0'), ('zlib', '1.3.1'), - ('pybind11', '2.13.6'), # override 2.12.0. SciPy-bundle has pybind/2.12.0. Fix: - # change to builddependency in SciPy-bundle? (TODO) + # ('pybind11', '2.13.6'), # override 2.12.0. SciPy-bundle has pybind/2.12.0. Fix: + # change to builddependency in SciPy-bundle? (TODO) ] # downloading xla and other tarballs to avoid that Bazel downloads it during the build From fc5b96969c026feaf10e4e009f88b892777192e8 Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Mon, 2 Dec 2024 11:03:53 +0100 Subject: [PATCH 14/32] Update jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb fix style --- .../easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb index a0c98cd7085..63f290a028c 100644 --- a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb +++ b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb @@ -20,7 +20,7 @@ builddependencies = [ # ('Bazel', '7.4.1'), TODO: problems with @@local_config_python//:py3_runtime: # Error in fail: interpreter_path must be an absolute path # Bazel 6.5.0 (download) works. - ('pybind11', '2.13.6'), # 2.12.0 ? SciPy-bundle has pybind/2.12.0. + ('pybind11', '2.13.6'), # 2.12.0 ? SciPy-bundle has pybind/2.12.0. # Fix: change to builddependency in SciPy-bundle? # tmporarily mv to dependencies (TODO: mv back) ('pytest-xdist', '3.6.1'), From 849f9fb47b011f69033ad5cf49b2d084b3ed2439 Mon Sep 17 00:00:00 2001 From: thoffman Date: Fri, 20 Dec 2024 13:23:11 +0100 Subject: [PATCH 15/32] test v0.4.34 with pybind11/2.12.0 builddep --- .../jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb | 168 ++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb new file mode 100644 index 00000000000..8fac152e93f --- /dev/null +++ b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb @@ -0,0 +1,168 @@ +# This file is an EasyBuild reciPY as per https://github.com/easybuilders/easybuild +# Author: Denis Kristak +# Updated by: Alex Domingo (Vrije Universiteit Brussel) +# Updated by: Pavel Tománek (INUITS) +# Updated by: Thomas Hoffmann (EMBL Heidelberg) +easyblock = 'PythonBundle' + +name = 'jax' +version = '0.4.34' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://jax.readthedocs.io/' +description = """Composable transformations of Python+NumPy programs: +differentiate, vectorize, JIT to GPU/TPU, and more""" + +toolchain = {'name': 'gfbf', 'version': '2024a'} +cuda_compute_capabilities = ["5.0", "6.0", "6.1", "7.0", "7.5", "8.0", "8.6", "9.0"] + +builddependencies = [ + # ('Bazel', '7.4.1'), TODO: problems with @@local_config_python//:py3_runtime: + # Error in fail: interpreter_path must be an absolute path + # Bazel 6.5.0 (download) works. + ('pybind11', '2.12.0'), # 2.12.0 ? SciPy-bundle has pybind/2.12.0. + # Fix: change to builddependency in SciPy-bundle? + # tmporarily mv to dependencies (TODO: mv back) + ('pytest-xdist', '3.6.1'), + ('git', '2.45.1'), # bazel uses git to fetch repositories + ('matplotlib', '3.9.2'), # required for tests/lobpcg_test.py + ('poetry', '1.8.3'), + ('Clang', '18.1.8') +] + +dependencies = [ + ('CUDA', '12.6.0', '', SYSTEM), # 12.6.2 ? + ('cuDNN', '9.5.0.50', versionsuffix, SYSTEM), + ('NCCL', '2.22.3', versionsuffix), + ('Python', '3.12.3'), + ('SciPy-bundle', '2024.05'), # 2024.11 ? + ('absl-py', '2.1.0'), + ('flatbuffers-python', '24.3.25'), + ('ml_dtypes', '0.5.0'), + ('zlib', '1.3.1'), + # ('pybind11', '2.13.6'), # override 2.12.0. SciPy-bundle has pybind/2.12.0. Fix: + # change to builddependency in SciPy-bundle? (TODO) +] + +# downloading xla and other tarballs to avoid that Bazel downloads it during the build +local_extract_cmd = 'mkdir -p %(builddir)s/archives && cp %s %(builddir)s/archives' +# note: following commits *must* be the exact same onces used upstream +# XLA_COMMIT from jax-jaxlib: third_party/xla/workspace.bzl +local_xla_commit = 'cd6e808c59f53b40a99df1f1b860db9a3e598bff' +# TFRT_COMMIT from xla: third_party/tsl/third_party/tf_runtime/workspace.bzl +local_tfrt_commit = '0aeefb1660d7e37964b2bb71b1f518096bda9a25' # TODO: still required? +# TODO: add other downloads + +# Use sources downloaded by EasyBuild +_jaxlib_buildopts = '--bazel_options="--distdir=%(builddir)s/archives" ' +# Use dependencies from EasyBuild +_jaxlib_buildopts += '--bazel_options="--action_env=TF_SYSTEM_LIBS=pybind11" ' +_jaxlib_buildopts += '--bazel_options="--action_env=CPATH=$EBROOTPYBIND11/include:$EBROOTCUDA/extras/CUPTI/include" ' +# Avoid warning (treated as error) in upb/table.c +_jaxlib_buildopts += '--bazel_options="--copt=-Wno-maybe-uninitialized" ' # TODO: still required? +# _jaxlib_buildopts += '--nouse_clang ' #TODO: avoid clang (?) +_jaxlib_buildopts += '--cuda_version=%(cudaver)s ' +_jaxlib_buildopts += '--python_bin_path=$EBROOTPYTHON/bin/python3 ' +# Do not use hermetic CUDA/cuDNN/NCCL: (requires action_env=CPATH=$EBROOTCUDA/extras/CUPTI/include"; +# requires patch of external/xla/xla/tsl/cuda/cupti_stub.cc and jaxlib/gpu/vendor.h (#include ): +_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDNN_PATH="$EBROOTCUDNN" """ +_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_NCCL_PATH="$EBROOTNCCL" """ +_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDA_PATH="$EBROOTCUDA" """ +_jaxlib_buildopts += """--bazel_options="--copt=-Ithird_party/gpus/cuda/extras/CUPTI/include" """ + +# get rid of .devDate versionsuffix: TODO: find a better way +# _no_devtag = """ export JAX_RELEASE && export JAXLIB_RELEASE && """ does not work (?) +_no_devtag = """ sed -i "s/version=__version__/version='%(version)s'/g" setup.py && """ +_jaxlib_buildopts += """--bazel_options="--action_env=JAXLIB_RELEASE=1" """ # required? + +components = [ + ('jaxlib', version, { + 'sources': [ + { + 'source_urls': ['https://github.com/google/jax/archive/'], + 'filename': 'jax-v%(version)s.tar.gz', + }, + { + 'source_urls': ['https://github.com/openxla/xla/archive'], + 'download_filename': '%s.tar.gz' % local_xla_commit, + 'filename': 'xla-%s.tar.gz' % local_xla_commit[:8], + 'extract_cmd': local_extract_cmd, + }, + { + 'source_urls': ['https://github.com/tensorflow/runtime/archive'], + 'download_filename': '%s.tar.gz' % local_tfrt_commit, + 'filename': 'tf_runtime-%s.tar.gz' % local_tfrt_commit[:8], + 'extract_cmd': local_extract_cmd, + }, + ], + 'patches': [ + 'jax-0.4.35_easyblock_compat.patch', + 'jax-0.4.35_fix-pybind11-systemlib_cupti.patch', + 'jax-0.4.35_version.patch', + ], + 'checksums': [ + {'jax-v0.4.34.tar.gz': + 'd3a75ad667772309ade81350fa70c4a78028a920028800282e46d8383c0ee6bb'}, + {'xla-cd6e808c.tar.gz': + '65cb6d63ef4083b35775052636cb9c629f86db6947c8b91711923ba31dbdcde8'}, + {'tf_runtime-0aeefb16.tar.gz': + 'a3df827d7896774cb1d80bf4e1c79ab05c268f29bd4d3db1fb5a4b9c2079d8e3'}, + {'jax-0.4.35_easyblock_compat.patch': + 'cbf4ad92b8438c4ce2a975efce1c47c57d4c3b117bceee071ab660f964057223'}, + {'jax-0.4.35_fix-pybind11-systemlib_cupti.patch': + '78efe6b5108a5da1935258286c94dea8438fd03651533c34023eeba27f514130'}, + {'jax-0.4.35_version.patch': + 'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'}, + ], + 'start_dir': 'jax-jax-v%(version)s', + 'buildopts': _jaxlib_buildopts, + 'prebuildopts': ' mkdir third_party/gpus/cuda/extras/ -p && ' + + 'ln -s $EBROOTCUDA/extras/CUPTI third_party/gpus/cuda/extras --relative &&' + + _no_devtag + }), +] + +# Some tests require an isolated run: TODO: still required? +local_isolated_tests = [ + 'tests/host_callback_test.py::HostCallbackTapTest::test_tap_scan_custom_jvp', + 'tests/host_callback_test.py::HostCallbackTapTest::test_tap_transforms_doc', + 'tests/lax_scipy_special_functions_test.py::LaxScipySpcialFunctionsTest' + + '::testScipySpecialFun_gammainc_s_2x1x4_float32_float32', +] +# deliberately not testing in parallel, as that results in (additional) failing tests; +# use XLA_PYTHON_CLIENT_ALLOCATOR=platform to allocate and deallocate GPU memory during testing, +# see https://github.com/google/jax/issues/7323 and +# https://github.com/google/jax/blob/main/docs/gpu_memory_allocation.rst; +# use CUDA_VISIBLE_DEVICES=0 to avoid failing tests on systems with multiple GPUs; +# use NVIDIA_TF32_OVERRIDE=0 to avoid loosing numerical precision by disabling TF32 Tensor Cores; +local_test_exports = [ + "NVIDIA_TF32_OVERRIDE=0", + "CUDA_VISIBLE_DEVICES=0", + "XLA_PYTHON_CLIENT_ALLOCATOR=platform", + "JAX_ENABLE_X64=true", +] +local_test = ''.join(['export %s;' % x for x in local_test_exports]) +# run all tests at once except for local_isolated_tests: +local_test += "pytest -vv tests %s && " % ' '.join(['--deselect %s' % x for x in local_isolated_tests]) +# run remaining local_isolated_tests separately: +local_test += ' && '.join(['pytest -vv %s' % x for x in local_isolated_tests]) + +use_pip = True + +exts_list = [ + (name, version, { + 'patches': ['jax-0.4.35_version.patch'], + 'preinstallopts': _no_devtag, + 'runtest': False, + 'source_tmpl': '%(name)s-v%(version)s.tar.gz', + 'source_urls': ['https://github.com/google/jax/archive/'], + 'checksums': [ + {'jax-v0.4.34.tar.gz': 'd3a75ad667772309ade81350fa70c4a78028a920028800282e46d8383c0ee6bb'}, + {'jax-0.4.35_version.patch': 'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'}, + ], + }), +] + +sanity_pip_check = True + +moduleclass = 'ai' From b0afceb366736da7112803b2aeafe0ec704a3bed Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Fri, 20 Dec 2024 14:29:13 +0100 Subject: [PATCH 16/32] Delete easybuild/easyconfigs/s/SciPy-bundle/SciPy-bundle-2024.05-gfbf-2024a.eb --- .../SciPy-bundle-2024.05-gfbf-2024a.eb | 106 ------------------ 1 file changed, 106 deletions(-) delete mode 100644 easybuild/easyconfigs/s/SciPy-bundle/SciPy-bundle-2024.05-gfbf-2024a.eb diff --git a/easybuild/easyconfigs/s/SciPy-bundle/SciPy-bundle-2024.05-gfbf-2024a.eb b/easybuild/easyconfigs/s/SciPy-bundle/SciPy-bundle-2024.05-gfbf-2024a.eb deleted file mode 100644 index b15452f6bb5..00000000000 --- a/easybuild/easyconfigs/s/SciPy-bundle/SciPy-bundle-2024.05-gfbf-2024a.eb +++ /dev/null @@ -1,106 +0,0 @@ -easyblock = 'PythonBundle' - -name = 'SciPy-bundle' -version = '2024.05' - -homepage = 'https://python.org/' -description = "Bundle of Python packages for scientific software" - -toolchain = {'name': 'gfbf', 'version': '2024a'} -toolchainopts = {'pic': True, 'lowopt': True} - -builddependencies = [ - ('hypothesis', '6.103.1'), - ('UnZip', '6.0'), - # scipy >= 1.9.0 uses Meson/Ninja - ('Meson', '1.4.0'), - ('meson-python', '0.16.0'), - ('Ninja', '1.12.1'), - ('pkgconf', '2.2.0'), # required by scipy - ('Cython', '3.0.10'), # required by numpy and scipy - ('pybind11', '2.12.0'), # required by scipy -] - -dependencies = [ - ('Python', '3.12.3'), - ('Python-bundle-PyPI', '2024.06'), -] - -use_pip = True - -# order is important! -exts_list = [ - ('numpy', '1.26.4', { - 'patches': [ - 'numpy-1.22.3_disable-broken-override-test.patch', - 'numpy-1.26.4_fix-riscv64-test-failures.patch', - ], - 'checksums': [ - {'numpy-1.26.4.tar.gz': '2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010'}, - {'numpy-1.22.3_disable-broken-override-test.patch': - '9c589bb073b28b25ff45eb3c63c57966aa508dd8b318d0b885b6295271e4983c'}, - {'numpy-1.26.4_fix-riscv64-test-failures.patch': - '81bd487dbca6da8285971a16a2c7b488718a051d3cd66450277bed6ff21741de'}, - ], - }), - ('ply', '3.11', { - 'checksums': ['00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3'], - }), - ('gast', '0.5.4', { - 'checksums': ['9c270fe5f4b130969b54174de7db4e764b09b4f7f67ccfc32480e29f78348d97'], - }), - ('beniget', '0.4.1', { - 'checksums': ['75554b3b8ad0553ce2f607627dad3d95c60c441189875b98e097528f8e23ac0c'], - }), - ('pythran', '0.16.1', { - 'checksums': ['861748c0f9c7d422b32724b114b3817d818ed4eab86c09781aa0a3f7ceabb7f9'], - }), - ('versioneer', '0.29', { - 'checksums': ['5ab283b9857211d61b53318b7c792cf68e798e765ee17c27ade9f6c924235731'], - }), - ('scipy', '1.13.1', { - 'enable_slow_tests': True, - 'ignore_test_result': False, - 'patches': [ - 'scipy-1.11.1_disable-tests.patch', - 'scipy-1.11.1_xfail-aarch64_test_maxiter_worsening.patch', - 'scipy-1.13.1_TestLinprogIPSparse.patch', - ], - 'checksums': [ - {'scipy-1.13.1.tar.gz': '095a87a0312b08dfd6a6155cbbd310a8c51800fc931b8c0b84003014b874ed3c'}, - {'scipy-1.11.1_disable-tests.patch': '906bfb03397d94882ccdc1b93bc2c8e854e0e060c2d107c83042992394e6a4af'}, - {'scipy-1.11.1_xfail-aarch64_test_maxiter_worsening.patch': - '918c8e6fa8215d459126f267764c961bde729ea4a116c7f6287cddfdc58ffcea'}, - {'scipy-1.13.1_TestLinprogIPSparse.patch': - '7213c2690b76c69f7e7103529cea3fa2098c05fbea556f04325fab9ca8c065f5'}, - ], - }), - ('numexpr', '2.10.0', { - 'patches': ['numexpr-2.10.0_fix-numpy-1.x.patch'], - 'checksums': [ - {'numexpr-2.10.0.tar.gz': 'c89e930752639df040539160326d8f99a84159bbea41943ab8e960591edaaef0'}, - {'numexpr-2.10.0_fix-numpy-1.x.patch': '8d70b2e95579e6f0adc07bc615144f7657b3b607f9210ec328b6622458ca726d'}, - ], - }), - ('Bottleneck', '1.3.8', { - 'checksums': ['6780d896969ba7f53c8995ba90c87c548beb3db435dc90c60b9a10ed1ab4d868'], - }), - ('tzdata', '2024.1', { - 'checksums': ['2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd'], - }), - ('pandas', '2.2.2', { - 'preinstallopts': "export PANDAS_CI=0 && ", - 'checksums': ['9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54'], - }), - ('mpmath', '1.3.0', { - 'checksums': ['7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f'], - }), - ('deap', '1.4.1', { - 'modulename': 'deap.base', - 'checksums': ['cc01de9892dfa7d1bc9803dab28892fead177f0182c81db47360a240ead778ff'], - }), -] - -sanity_pip_check = True - -moduleclass = 'lang' From 2e75e9f774cdc0ce73df3c879f263bf4b7e39a57 Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Fri, 20 Dec 2024 14:29:35 +0100 Subject: [PATCH 17/32] Delete easybuild/easyconfigs/p/pybind11/pybind11-2.13.6-GCC-13.3.0.eb --- .../p/pybind11/pybind11-2.13.6-GCC-13.3.0.eb | 33 ------------------- 1 file changed, 33 deletions(-) delete mode 100644 easybuild/easyconfigs/p/pybind11/pybind11-2.13.6-GCC-13.3.0.eb diff --git a/easybuild/easyconfigs/p/pybind11/pybind11-2.13.6-GCC-13.3.0.eb b/easybuild/easyconfigs/p/pybind11/pybind11-2.13.6-GCC-13.3.0.eb deleted file mode 100644 index 2e698fa2bb7..00000000000 --- a/easybuild/easyconfigs/p/pybind11/pybind11-2.13.6-GCC-13.3.0.eb +++ /dev/null @@ -1,33 +0,0 @@ -name = 'pybind11' -version = '2.13.6' - -homepage = 'https://pybind11.readthedocs.io' -description = """pybind11 is a lightweight header-only library that exposes C++ types in Python and vice versa, - mainly to create Python bindings of existing C++ code.""" - -toolchain = {'name': 'GCC', 'version': '13.3.0'} - -source_urls = ['https://github.com/pybind/pybind11/archive/'] -sources = ['v%(version)s.tar.gz'] -patches = [ - 'pybind11-2.10.3_require-catch.patch', -] -checksums = [ - {'v2.13.6.tar.gz': 'e08cb87f4773da97fa7b5f035de8763abc656d87d5773e62f6da0587d1f0ec20'}, - {'pybind11-2.10.3_require-catch.patch': '4a27ba3ef1d5c535d120d6178a6e876ae678e4899a07500aab37908357b0b60b'}, -] - -builddependencies = [ - ('CMake', '3.29.3'), - # Test dependencies - ('Eigen', '3.4.0'), - ('Catch2', '2.13.10'), - ('Python-bundle-PyPI', '2024.06'), # to provide pytest -] - -dependencies = [ - ('Boost', '1.85.0'), - ('Python', '3.12.3'), -] - -moduleclass = 'lib' From b9887766ce5e4a71cbd35e16c46ef552e32f3544 Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Fri, 20 Dec 2024 14:29:55 +0100 Subject: [PATCH 18/32] Delete easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb --- .../jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb | 170 ------------------ 1 file changed, 170 deletions(-) delete mode 100644 easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb deleted file mode 100644 index 63f290a028c..00000000000 --- a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb +++ /dev/null @@ -1,170 +0,0 @@ -# This file is an EasyBuild reciPY as per https://github.com/easybuilders/easybuild -# Author: Denis Kristak -# Updated by: Alex Domingo (Vrije Universiteit Brussel) -# Updated by: Pavel Tománek (INUITS) -# Updated by: Thomas Hoffmann (EMBL Heidelberg) -easyblock = 'PythonBundle' - -name = 'jax' -version = '0.4.35' -versionsuffix = '-CUDA-%(cudaver)s' - -homepage = 'https://jax.readthedocs.io/' -description = """Composable transformations of Python+NumPy programs: -differentiate, vectorize, JIT to GPU/TPU, and more""" - -toolchain = {'name': 'gfbf', 'version': '2024a'} -cuda_compute_capabilities = ["5.0", "6.0", "6.1", "7.0", "7.5", "8.0", "8.6", "9.0"] - -builddependencies = [ - # ('Bazel', '7.4.1'), TODO: problems with @@local_config_python//:py3_runtime: - # Error in fail: interpreter_path must be an absolute path - # Bazel 6.5.0 (download) works. - ('pybind11', '2.13.6'), # 2.12.0 ? SciPy-bundle has pybind/2.12.0. - # Fix: change to builddependency in SciPy-bundle? - # tmporarily mv to dependencies (TODO: mv back) - ('pytest-xdist', '3.6.1'), - ('git', '2.45.1'), # bazel uses git to fetch repositories - ('matplotlib', '3.9.2'), # required for tests/lobpcg_test.py - ('poetry', '1.8.3'), - ('Clang', '18.1.8') -] - -dependencies = [ - ('CUDA', '12.6.0', '', SYSTEM), # 12.6.2 ? - ('cuDNN', '9.5.0.50', versionsuffix, SYSTEM), - ('NCCL', '2.22.3', versionsuffix), - ('Python', '3.12.3'), - ('SciPy-bundle', '2024.05'), # 2024.11 ? - ('absl-py', '2.1.0'), - ('flatbuffers-python', '24.3.25'), - ('ml_dtypes', '0.5.0'), - ('zlib', '1.3.1'), - # ('pybind11', '2.13.6'), # override 2.12.0. SciPy-bundle has pybind/2.12.0. Fix: - # change to builddependency in SciPy-bundle? (TODO) -] - -# downloading xla and other tarballs to avoid that Bazel downloads it during the build -local_extract_cmd = 'mkdir -p %(builddir)s/archives && cp %s %(builddir)s/archives' -# note: following commits *must* be the exact same onces used upstream -# XLA_COMMIT from jax-jaxlib: third_party/xla/workspace.bzl -local_xla_commit = '76da730179313b3bebad6dea6861768421b7358c' -# TFRT_COMMIT from xla: third_party/tsl/third_party/tf_runtime/workspace.bzl -local_tfrt_commit = '0aeefb1660d7e37964b2bb71b1f518096bda9a25' # TODO: still required? -# TODO: add other downloads - -# Use sources downloaded by EasyBuild -_jaxlib_buildopts = '--bazel_options="--distdir=%(builddir)s/archives" ' -# Use dependencies from EasyBuild -_jaxlib_buildopts += '--bazel_options="--action_env=TF_SYSTEM_LIBS=pybind11" ' -_jaxlib_buildopts += '--bazel_options="--action_env=CPATH=$EBROOTPYBIND11/include:$EBROOTCUDA/extras/CUPTI/include" ' -# Avoid warning (treated as error) in upb/table.c -_jaxlib_buildopts += '--bazel_options="--copt=-Wno-maybe-uninitialized" ' # TODO: still required? -# _jaxlib_buildopts += '--nouse_clang ' #TODO: avoid clang (?) -_jaxlib_buildopts += '--cuda_version=%(cudaver)s ' -_jaxlib_buildopts += '--python_bin_path=$EBROOTPYTHON/bin/python3 ' -# Do not use hermetic CUDA/cuDNN/NCCL: (requires action_env=CPATH=$EBROOTCUDA/extras/CUPTI/include"; -# requires patch of external/xla/xla/tsl/cuda/cupti_stub.cc and jaxlib/gpu/vendor.h (#include ): -_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDNN_PATH="$EBROOTCUDNN" """ -_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_NCCL_PATH="$EBROOTNCCL" """ -_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDA_PATH="$EBROOTCUDA" """ -_jaxlib_buildopts += """--bazel_options="--copt=-Ithird_party/gpus/cuda/extras/CUPTI/include" """ - -# get rid of .devDate versionsuffix: TODO: find a better way -# _no_devtag = """ export JAX_RELEASE && export JAXLIB_RELEASE && """ does not work (?) -_no_devtag = """ sed -i "s/version=__version__/version='%(version)s'/g" setup.py && """ -_jaxlib_buildopts += """--bazel_options="--action_env=JAXLIB_RELEASE=1" """ # required? - -components = [ - ('jaxlib', version, { - 'sources': [ - { - 'source_urls': ['https://github.com/google/jax/archive/'], - 'filename': 'jax-v%(version)s.tar.gz', - }, - { - 'source_urls': ['https://github.com/openxla/xla/archive'], - 'download_filename': '%s.tar.gz' % local_xla_commit, - 'filename': 'xla-%s.tar.gz' % local_xla_commit[:8], - 'extract_cmd': local_extract_cmd, - }, - { - 'source_urls': ['https://github.com/tensorflow/runtime/archive'], - 'download_filename': '%s.tar.gz' % local_tfrt_commit, - 'filename': 'tf_runtime-%s.tar.gz' % local_tfrt_commit[:8], - 'extract_cmd': local_extract_cmd, - }, - ], - 'patches': [ - 'jax-0.4.35_easyblock_compat.patch', - 'jax-0.4.35_fix-pybind11-systemlib_cupti.patch', - 'jax-0.4.35_version.patch', - ], - 'checksums': [ - {'jax-v0.4.35.tar.gz': - '65e086708ae56670676b7b2340ad82b901d8c9993d1241a839c8990bdb8d6212'}, - {'xla-76da7301.tar.gz': - 'd67ced09b69ab8d7b26fa4cd5f48b22db57eb330294a35f6e1d462ee17066757'}, - {'tf_runtime-0aeefb16.tar.gz': - 'a3df827d7896774cb1d80bf4e1c79ab05c268f29bd4d3db1fb5a4b9c2079d8e3'}, - {'jax-0.4.35_easyblock_compat.patch': - 'cbf4ad92b8438c4ce2a975efce1c47c57d4c3b117bceee071ab660f964057223'}, - {'jax-0.4.35_fix-pybind11-systemlib_cupti.patch': - '78efe6b5108a5da1935258286c94dea8438fd03651533c34023eeba27f514130'}, - {'jax-0.4.35_version.patch': - 'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'}, - ], - 'start_dir': 'jax-jax-v%(version)s', - 'buildopts': _jaxlib_buildopts, - 'prebuildopts': ' mkdir third_party/gpus/cuda/extras/ -p && ' + - 'ln -s $EBROOTCUDA/extras/CUPTI third_party/gpus/cuda/extras --relative &&' + - _no_devtag - }), -] - -# Some tests require an isolated run: TODO: still required? -local_isolated_tests = [ - 'tests/host_callback_test.py::HostCallbackTapTest::test_tap_scan_custom_jvp', - 'tests/host_callback_test.py::HostCallbackTapTest::test_tap_transforms_doc', - 'tests/lax_scipy_special_functions_test.py::LaxScipySpcialFunctionsTest' + - '::testScipySpecialFun_gammainc_s_2x1x4_float32_float32', -] -# deliberately not testing in parallel, as that results in (additional) failing tests; -# use XLA_PYTHON_CLIENT_ALLOCATOR=platform to allocate and deallocate GPU memory during testing, -# see https://github.com/google/jax/issues/7323 and -# https://github.com/google/jax/blob/main/docs/gpu_memory_allocation.rst; -# use CUDA_VISIBLE_DEVICES=0 to avoid failing tests on systems with multiple GPUs; -# use NVIDIA_TF32_OVERRIDE=0 to avoid loosing numerical precision by disabling TF32 Tensor Cores; -local_test_exports = [ - "NVIDIA_TF32_OVERRIDE=0", - "CUDA_VISIBLE_DEVICES=0", - "XLA_PYTHON_CLIENT_ALLOCATOR=platform", - "JAX_ENABLE_X64=true", -] -local_test = ''.join(['export %s;' % x for x in local_test_exports]) -# run all tests at once except for local_isolated_tests: -local_test += "pytest -vv tests %s && " % ' '.join(['--deselect %s' % x for x in local_isolated_tests]) -# run remaining local_isolated_tests separately: -local_test += ' && '.join(['pytest -vv %s' % x for x in local_isolated_tests]) - -use_pip = True - -exts_list = [ - (name, version, { - 'source_tmpl': '%(name)s-v%(version)s.tar.gz', - 'source_urls': ['https://github.com/google/jax/archive/'], - # 'patches': ['jax-0.4.25_fix_env_test_no_log_spam.patch'], # TODO: still required? update? - 'patches': ['jax-0.4.35_version.patch'], - 'checksums': [ - {'jax-v0.4.35.tar.gz': '65e086708ae56670676b7b2340ad82b901d8c9993d1241a839c8990bdb8d6212'}, - {'jax-0.4.35_version.patch': 'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'}, - ], - # 'runtest': local_test, - 'runtest': False, # tmp - 'preinstallopts': _no_devtag - }), -] - -sanity_pip_check = True - -moduleclass = 'ai' From 6db6d36c5fa4f9b09b42cef3c2fe471c59bf6bad Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Fri, 20 Dec 2024 14:30:39 +0100 Subject: [PATCH 19/32] Update jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb --- .../easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb index 8fac152e93f..45312147cd6 100644 --- a/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb +++ b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb @@ -20,9 +20,7 @@ builddependencies = [ # ('Bazel', '7.4.1'), TODO: problems with @@local_config_python//:py3_runtime: # Error in fail: interpreter_path must be an absolute path # Bazel 6.5.0 (download) works. - ('pybind11', '2.12.0'), # 2.12.0 ? SciPy-bundle has pybind/2.12.0. - # Fix: change to builddependency in SciPy-bundle? - # tmporarily mv to dependencies (TODO: mv back) + ('pybind11', '2.12.0'), ('pytest-xdist', '3.6.1'), ('git', '2.45.1'), # bazel uses git to fetch repositories ('matplotlib', '3.9.2'), # required for tests/lobpcg_test.py @@ -40,8 +38,6 @@ dependencies = [ ('flatbuffers-python', '24.3.25'), ('ml_dtypes', '0.5.0'), ('zlib', '1.3.1'), - # ('pybind11', '2.13.6'), # override 2.12.0. SciPy-bundle has pybind/2.12.0. Fix: - # change to builddependency in SciPy-bundle? (TODO) ] # downloading xla and other tarballs to avoid that Bazel downloads it during the build From f3fc230cdcb360f9b13348836b9141e6a8a02d55 Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Fri, 20 Dec 2024 15:11:19 +0100 Subject: [PATCH 20/32] revert SciPy-bundle-2024.05-gfbf-2024a.eb --- .../SciPy-bundle-2024.05-gfbf-2024a.eb | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 easybuild/easyconfigs/s/SciPy-bundle/SciPy-bundle-2024.05-gfbf-2024a.eb diff --git a/easybuild/easyconfigs/s/SciPy-bundle/SciPy-bundle-2024.05-gfbf-2024a.eb b/easybuild/easyconfigs/s/SciPy-bundle/SciPy-bundle-2024.05-gfbf-2024a.eb new file mode 100644 index 00000000000..5584d888f30 --- /dev/null +++ b/easybuild/easyconfigs/s/SciPy-bundle/SciPy-bundle-2024.05-gfbf-2024a.eb @@ -0,0 +1,106 @@ +easyblock = 'PythonBundle' + +name = 'SciPy-bundle' +version = '2024.05' + +homepage = 'https://python.org/' +description = "Bundle of Python packages for scientific software" + +toolchain = {'name': 'gfbf', 'version': '2024a'} +toolchainopts = {'pic': True, 'lowopt': True} + +builddependencies = [ + ('hypothesis', '6.103.1'), + ('UnZip', '6.0'), + # scipy >= 1.9.0 uses Meson/Ninja + ('Meson', '1.4.0'), + ('meson-python', '0.16.0'), + ('Ninja', '1.12.1'), + ('pkgconf', '2.2.0'), # required by scipy + ('Cython', '3.0.10'), # required by numpy and scipy +] + +dependencies = [ + ('Python', '3.12.3'), + ('Python-bundle-PyPI', '2024.06'), + ('pybind11', '2.12.0'), # required by scipy +] + +use_pip = True + +# order is important! +exts_list = [ + ('numpy', '1.26.4', { + 'patches': [ + 'numpy-1.22.3_disable-broken-override-test.patch', + 'numpy-1.26.4_fix-riscv64-test-failures.patch', + ], + 'checksums': [ + {'numpy-1.26.4.tar.gz': '2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010'}, + {'numpy-1.22.3_disable-broken-override-test.patch': + '9c589bb073b28b25ff45eb3c63c57966aa508dd8b318d0b885b6295271e4983c'}, + {'numpy-1.26.4_fix-riscv64-test-failures.patch': + '81bd487dbca6da8285971a16a2c7b488718a051d3cd66450277bed6ff21741de'}, + ], + }), + ('ply', '3.11', { + 'checksums': ['00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3'], + }), + ('gast', '0.5.4', { + 'checksums': ['9c270fe5f4b130969b54174de7db4e764b09b4f7f67ccfc32480e29f78348d97'], + }), + ('beniget', '0.4.1', { + 'checksums': ['75554b3b8ad0553ce2f607627dad3d95c60c441189875b98e097528f8e23ac0c'], + }), + ('pythran', '0.16.1', { + 'checksums': ['861748c0f9c7d422b32724b114b3817d818ed4eab86c09781aa0a3f7ceabb7f9'], + }), + ('versioneer', '0.29', { + 'checksums': ['5ab283b9857211d61b53318b7c792cf68e798e765ee17c27ade9f6c924235731'], + }), + ('scipy', '1.13.1', { + 'enable_slow_tests': True, + 'ignore_test_result': False, + 'patches': [ + 'scipy-1.11.1_disable-tests.patch', + 'scipy-1.11.1_xfail-aarch64_test_maxiter_worsening.patch', + 'scipy-1.13.1_TestLinprogIPSparse.patch', + ], + 'checksums': [ + {'scipy-1.13.1.tar.gz': '095a87a0312b08dfd6a6155cbbd310a8c51800fc931b8c0b84003014b874ed3c'}, + {'scipy-1.11.1_disable-tests.patch': '906bfb03397d94882ccdc1b93bc2c8e854e0e060c2d107c83042992394e6a4af'}, + {'scipy-1.11.1_xfail-aarch64_test_maxiter_worsening.patch': + '918c8e6fa8215d459126f267764c961bde729ea4a116c7f6287cddfdc58ffcea'}, + {'scipy-1.13.1_TestLinprogIPSparse.patch': + '7213c2690b76c69f7e7103529cea3fa2098c05fbea556f04325fab9ca8c065f5'}, + ], + }), + ('numexpr', '2.10.0', { + 'patches': ['numexpr-2.10.0_fix-numpy-1.x.patch'], + 'checksums': [ + {'numexpr-2.10.0.tar.gz': 'c89e930752639df040539160326d8f99a84159bbea41943ab8e960591edaaef0'}, + {'numexpr-2.10.0_fix-numpy-1.x.patch': '8d70b2e95579e6f0adc07bc615144f7657b3b607f9210ec328b6622458ca726d'}, + ], + }), + ('Bottleneck', '1.3.8', { + 'checksums': ['6780d896969ba7f53c8995ba90c87c548beb3db435dc90c60b9a10ed1ab4d868'], + }), + ('tzdata', '2024.1', { + 'checksums': ['2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd'], + }), + ('pandas', '2.2.2', { + 'preinstallopts': "export PANDAS_CI=0 && ", + 'checksums': ['9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54'], + }), + ('mpmath', '1.3.0', { + 'checksums': ['7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f'], + }), + ('deap', '1.4.1', { + 'modulename': 'deap.base', + 'checksums': ['cc01de9892dfa7d1bc9803dab28892fead177f0182c81db47360a240ead778ff'], + }), +] + +sanity_pip_check = True + +moduleclass = 'lang' From 45153cfd1d11cdccaaeb8fd8192001b245fe4a44 Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Thu, 16 Jan 2025 16:03:31 +0100 Subject: [PATCH 21/32] Update jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb add versionsuffix to Clang dependency --- .../easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb index 45312147cd6..db8683541ac 100644 --- a/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb +++ b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb @@ -25,7 +25,7 @@ builddependencies = [ ('git', '2.45.1'), # bazel uses git to fetch repositories ('matplotlib', '3.9.2'), # required for tests/lobpcg_test.py ('poetry', '1.8.3'), - ('Clang', '18.1.8') + ('Clang', '18.1.8', versionsuffix) ] dependencies = [ From 259590c3d3e3dac0bc7c5d3917c4b1383add7485 Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Thu, 16 Jan 2025 17:17:13 +0100 Subject: [PATCH 22/32] Update jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb update builddep pybind11 to v2.13.6 --- .../easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb index db8683541ac..8e35680c172 100644 --- a/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb +++ b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb @@ -20,7 +20,7 @@ builddependencies = [ # ('Bazel', '7.4.1'), TODO: problems with @@local_config_python//:py3_runtime: # Error in fail: interpreter_path must be an absolute path # Bazel 6.5.0 (download) works. - ('pybind11', '2.12.0'), + ('pybind11', '2.13.6'), ('pytest-xdist', '3.6.1'), ('git', '2.45.1'), # bazel uses git to fetch repositories ('matplotlib', '3.9.2'), # required for tests/lobpcg_test.py From ef36815b288bd37e0ef6b599ab0e412dd10b0f04 Mon Sep 17 00:00:00 2001 From: thoffman Date: Fri, 17 Jan 2025 16:34:42 +0100 Subject: [PATCH 23/32] also build jax_cuda12_plugin and jax_cuda12_pjrt --- .../jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb | 45 +++++++++- ...-0.4.35_fix-pybind11-systemlib_cupti.patch | 84 ++++++++++++++++++- 2 files changed, 123 insertions(+), 6 deletions(-) diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb index 8e35680c172..3992c90a0b6 100644 --- a/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb +++ b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb @@ -66,6 +66,12 @@ _jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_NCCL_PATH="$EBROOTNCCL" _jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDA_PATH="$EBROOTCUDA" """ _jaxlib_buildopts += """--bazel_options="--copt=-Ithird_party/gpus/cuda/extras/CUPTI/include" """ +_plugins_buildopts = """--enable_cuda """ +_plugins_buildopts += """--build_gpu_plugin """ +# _plugins_buildopts +="""--gpu_plugin_cuda_version=12 """ +_plugins_buildopts += """--build_gpu_pjrt_plugin """ +_plugins_buildopts += """--build_gpu_kernel_plugin=cuda """ + # get rid of .devDate versionsuffix: TODO: find a better way # _no_devtag = """ export JAX_RELEASE && export JAXLIB_RELEASE && """ does not work (?) _no_devtag = """ sed -i "s/version=__version__/version='%(version)s'/g" setup.py && """ @@ -106,7 +112,7 @@ components = [ {'jax-0.4.35_easyblock_compat.patch': 'cbf4ad92b8438c4ce2a975efce1c47c57d4c3b117bceee071ab660f964057223'}, {'jax-0.4.35_fix-pybind11-systemlib_cupti.patch': - '78efe6b5108a5da1935258286c94dea8438fd03651533c34023eeba27f514130'}, + '51369589193be60dc94ec2de1b35d0a9268288578903fb05d41b6d1a8c9df460'}, {'jax-0.4.35_version.patch': 'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'}, ], @@ -116,6 +122,39 @@ components = [ 'ln -s $EBROOTCUDA/extras/CUPTI third_party/gpus/cuda/extras --relative &&' + _no_devtag }), + # build jaxlib first and then plugins in 2nd interation: + ('jaxlib', version, { + 'sources': [ + { + 'source_urls': ['https://github.com/google/jax/archive/'], + 'filename': 'jax-v%(version)s.tar.gz', + }, + { + 'source_urls': ['https://github.com/openxla/xla/archive'], + 'download_filename': '%s.tar.gz' % local_xla_commit, + 'filename': 'xla-%s.tar.gz' % local_xla_commit[:8], + 'extract_cmd': local_extract_cmd, + }, + { + 'source_urls': ['https://github.com/tensorflow/runtime/archive'], + 'download_filename': '%s.tar.gz' % local_tfrt_commit, + 'filename': 'tf_runtime-%s.tar.gz' % local_tfrt_commit[:8], + 'extract_cmd': local_extract_cmd, + }, + ], + 'checksums': [ + {'jax-v0.4.34.tar.gz': + 'd3a75ad667772309ade81350fa70c4a78028a920028800282e46d8383c0ee6bb'}, + {'xla-cd6e808c.tar.gz': + '65cb6d63ef4083b35775052636cb9c629f86db6947c8b91711923ba31dbdcde8'}, + {'tf_runtime-0aeefb16.tar.gz': + 'a3df827d7896774cb1d80bf4e1c79ab05c268f29bd4d3db1fb5a4b9c2079d8e3'}, + ], + 'start_dir': 'jax-jax-v%(version)s', + 'buildopts': _jaxlib_buildopts + _plugins_buildopts, + 'prebuildopts': _no_devtag + }), + ] # Some tests require an isolated run: TODO: still required? @@ -158,7 +197,9 @@ exts_list = [ ], }), ] - +sanity_check_commands = [ + 'python -c "import jax_cuda12_plugin"' +] sanity_pip_check = True moduleclass = 'ai' diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35_fix-pybind11-systemlib_cupti.patch b/easybuild/easyconfigs/j/jax/jax-0.4.35_fix-pybind11-systemlib_cupti.patch index 265f9d48636..5da275fb960 100644 --- a/easybuild/easyconfigs/j/jax/jax-0.4.35_fix-pybind11-systemlib_cupti.patch +++ b/easybuild/easyconfigs/j/jax/jax-0.4.35_fix-pybind11-systemlib_cupti.patch @@ -51,11 +51,87 @@ diff -ruN jax-jax-v0.4.35/third_party/xla/workspace.bzl jax-jax-v0.4.35_jaxlib_c # For development, one often wants to make changes to the TF repository as well diff -ruN jax-jax-v0.4.35/third_party/xla/xla-76da73_cupti.patch jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib/third_party/xla/xla-76da73_cupti.patch --- jax-jax-v0.4.35/third_party/xla/xla-76da73_cupti.patch 1970-01-01 01:00:00.000000000 +0100 -+++ jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib/third_party/xla/xla-76da73_cupti.patch 2024-11-27 12:18:26.668582799 +0100 -@@ -0,0 +1,12 @@ -+diff -ru xla-76da730179313b3bebad6dea6861768421b7358c/xla/tsl/cuda/cupti_stub.cc xla-76da730179313b3bebad6dea6861768421b7358c_cupti/xla/tsl/cuda/cupti_stub.cc ++++ jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib/third_party/xla/xla-76da73_cupti.patch 2025-01-17 15:44:11.545694652 +0100 +@@ -0,0 +1,88 @@ ++Binary files xla-76da730179313b3bebad6dea6861768421b7358c/third_party/tsl/third_party/gpus/cuda/hermetic/.cuda_configure.bzl.swp and xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/third_party/tsl/third_party/gpus/cuda/hermetic/.cuda_configure.bzl.swp differ ++Binary files xla-76da730179313b3bebad6dea6861768421b7358c/third_party/tsl/third_party/gpus/.find_cuda_config.py.swp and xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/third_party/tsl/third_party/gpus/.find_cuda_config.py.swp differ ++diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_collector.cc xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_collector.cc ++--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_collector.cc 2024-10-21 20:29:31.000000000 +0200 +++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_collector.cc 2025-01-17 10:22:48.947856740 +0100 ++@@ -24,8 +24,8 @@ ++ #include "absl/hash/hash.h" ++ #include "absl/strings/str_cat.h" ++ #include "absl/strings/str_join.h" ++-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" ++-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti_activity.h" +++#include +++#include ++ #include "third_party/gpus/cuda/include/cuda.h" ++ #include "third_party/gpus/cuda/include/cuda_occupancy.h" ++ #include "xla/tsl/profiler/utils/parse_annotation.h" ++diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_interface.h xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_interface.h ++--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_interface.h 2024-10-21 20:29:31.000000000 +0200 +++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_interface.h 2025-01-17 10:22:48.947856740 +0100 ++@@ -19,7 +19,7 @@ ++ #include ++ #include ++ ++-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" +++#include ++ #include "third_party/gpus/cuda/include/cuda.h" ++ #include "tsl/platform/macros.h" ++ #include "tsl/platform/types.h" ++diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_profiler.h xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_profiler.h ++--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_profiler.h 2024-10-21 20:29:31.000000000 +0200 +++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_profiler.h 2025-01-17 10:22:48.947856740 +0100 ++@@ -18,7 +18,7 @@ ++ #include ++ #include ++ ++-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" +++#include ++ #include "xla/backends/profiler/gpu/cupti_interface.h" ++ #include "tsl/platform/types.h" ++ ++diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_tracer.cc xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_tracer.cc ++--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_tracer.cc 2024-10-21 20:29:31.000000000 +0200 +++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_tracer.cc 2025-01-17 14:50:00.284134999 +0100 ++@@ -24,7 +24,7 @@ ++ #include "absl/cleanup/cleanup.h" ++ #include "absl/container/flat_hash_set.h" ++ #include "absl/types/span.h" ++-#include "third_party/gpus/cuda/extras/CUPTI/include/generated_nvtx_meta.h" +++#include ++ #include "third_party/gpus/cuda/include/cuda.h" ++ #include "xla/backends/profiler/gpu/cupti_buffer_events.h" ++ #include "xla/backends/profiler/gpu/cupti_collector.h" ++diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_tracer.h xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_tracer.h ++--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_tracer.h 2024-10-21 20:29:31.000000000 +0200 +++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_tracer.h 2025-01-17 10:22:48.948856765 +0100 ++@@ -22,7 +22,7 @@ ++ ++ #include "absl/status/status.h" ++ #include "absl/types/optional.h" ++-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" +++#include ++ #include "third_party/gpus/cuda/include/nvtx3/nvToolsExt.h" ++ #include "xla/backends/profiler/gpu/cupti_collector.h" ++ #include "xla/backends/profiler/gpu/cupti_interface.h" ++diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_wrapper.h xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_wrapper.h ++--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_wrapper.h 2024-10-21 20:29:31.000000000 +0200 +++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_wrapper.h 2025-01-17 10:22:48.948856765 +0100 ++@@ -19,7 +19,7 @@ ++ #include ++ #include ++ ++-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" +++#include ++ #include "third_party/gpus/cuda/include/cuda.h" ++ #include "xla/backends/profiler/gpu/cupti_interface.h" ++ ++diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/tsl/cuda/cupti_stub.cc xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/tsl/cuda/cupti_stub.cc +--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/tsl/cuda/cupti_stub.cc 2024-10-21 20:29:31.000000000 +0200 -++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti/xla/tsl/cuda/cupti_stub.cc 2024-11-26 12:04:11.695539146 +0100 +++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/tsl/cuda/cupti_stub.cc 2025-01-17 10:22:48.948856765 +0100 +@@ -13,7 +13,7 @@ + limitations under the License. + ==============================================================================*/ From 26adb0ec1ad09ca1d11e1de4a595dfc3b97ef241 Mon Sep 17 00:00:00 2001 From: thoffman Date: Wed, 22 Jan 2025 12:16:08 +0100 Subject: [PATCH 24/32] set XLA_FLAGS xla_gpu_cuda_data_dir to $CUDA_HOME --- .../jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb index 3992c90a0b6..2d04c3530a5 100644 --- a/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb +++ b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb @@ -156,6 +156,18 @@ components = [ }), ] +#failing: +##tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex128 FAILED [ 98%] +##tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex64 FAILED [ 98%] +##tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex128 FAILED [ 99%] +##tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex64 FAILED [ 99%] +##FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex128 - AssertionError: +##FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex64 - AssertionError: +##FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex128 - AssertionError: +##FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex64 - AssertionError: +##tests/nn_test.py::NNFunctionsTest::testDotProductAttentionMask7 FAILED [ 10%] +##FAILED tests/nn_test.py::NNFunctionsTest::testDotProductAttentionMask7 - AssertionError: +# # Some tests require an isolated run: TODO: still required? local_isolated_tests = [ @@ -198,8 +210,21 @@ exts_list = [ }), ] sanity_check_commands = [ - 'python -c "import jax_cuda12_plugin"' + """python -c "import jax_cuda"$(echo $EBVERSIONCUDA|awk -F '.' '{print $1}')"_plugin" """ ] sanity_pip_check = True +# TODO: patch to set default XLA_FLAGS +modluafooter = """ +setenv("XLA_FLAGS", "--xla_gpu_cuda_data_dir=" .. os.getenv("CUDA_HOME")); +""" + +modtclfooter = """ +setenv XLA_FLAGS --xla_gpu_cuda_data_dir=$::env(CUDA_HOME) +""" + +#TODO: sanity check paths + + moduleclass = 'ai' + From 3695affdd7ed44c112794d972720950644a708eb Mon Sep 17 00:00:00 2001 From: thoffman Date: Wed, 22 Jan 2025 15:28:54 +0100 Subject: [PATCH 25/32] fix style --- .../jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb index 2d04c3530a5..ba56113debb 100644 --- a/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb +++ b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb @@ -156,17 +156,17 @@ components = [ }), ] -#failing: -##tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex128 FAILED [ 98%] -##tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex64 FAILED [ 98%] -##tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex128 FAILED [ 99%] -##tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex64 FAILED [ 99%] -##FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex128 - AssertionError: -##FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex64 - AssertionError: -##FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex128 - AssertionError: -##FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex64 - AssertionError: -##tests/nn_test.py::NNFunctionsTest::testDotProductAttentionMask7 FAILED [ 10%] -##FAILED tests/nn_test.py::NNFunctionsTest::testDotProductAttentionMask7 - AssertionError: +# failing: +# tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex128 FAILED [ 98%] +# tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex64 FAILED [ 98%] +# tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex128 FAILED [ 99%] +# tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex64 FAILED [ 99%] +# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex128 - AssertionError: +# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex64 - AssertionError: +# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex128 - AssertionError: +# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex64 - AssertionError: +# tests/nn_test.py::NNFunctionsTest::testDotProductAttentionMask7 FAILED [ 10%] +# FAILED tests/nn_test.py::NNFunctionsTest::testDotProductAttentionMask7 - AssertionError: # # Some tests require an isolated run: TODO: still required? @@ -223,8 +223,7 @@ modtclfooter = """ setenv XLA_FLAGS --xla_gpu_cuda_data_dir=$::env(CUDA_HOME) """ -#TODO: sanity check paths +# TODO: sanity check paths moduleclass = 'ai' - From 8fc8f355e9431621aab7b05392625630166bb048 Mon Sep 17 00:00:00 2001 From: thoffman Date: Fri, 24 Jan 2025 14:39:19 +0100 Subject: [PATCH 26/32] add EC for Bazel v6.5.0 --- .../b/Bazel/Bazel-6.5.0-GCCcore-13.3.0.eb | 31 +++++++++++++++++++ ...azel-6.5.0_py3.12_pytest_assertEqual.patch | 31 +++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 easybuild/easyconfigs/b/Bazel/Bazel-6.5.0-GCCcore-13.3.0.eb create mode 100644 easybuild/easyconfigs/b/Bazel/Bazel-6.5.0_py3.12_pytest_assertEqual.patch diff --git a/easybuild/easyconfigs/b/Bazel/Bazel-6.5.0-GCCcore-13.3.0.eb b/easybuild/easyconfigs/b/Bazel/Bazel-6.5.0-GCCcore-13.3.0.eb new file mode 100644 index 00000000000..c4c77a8a7dc --- /dev/null +++ b/easybuild/easyconfigs/b/Bazel/Bazel-6.5.0-GCCcore-13.3.0.eb @@ -0,0 +1,31 @@ +name = 'Bazel' +version = '6.5.0' + +homepage = 'https://bazel.io/' +description = """Bazel is a build tool that builds code quickly and reliably. +It is used to build the majority of Google's software.""" + +toolchain = {'name': 'GCCcore', 'version': '13.3.0'} + +source_urls = ['https://github.com/bazelbuild/%(namelower)s/releases/download/%(version)s'] +sources = ['%(namelower)s-%(version)s-dist.zip'] +patches = ['Bazel-6.5.0_py3.12_pytest_assertEqual.patch'] +checksums = [ + {'bazel-6.5.0-dist.zip': 'fc89da919415289f29e4ff18a5e01270ece9a6fe83cb60967218bac4a3bb3ed2'}, + {'Bazel-6.5.0_py3.12_pytest_assertEqual.patch': '2670dd5c393970ba20db2c98cf0208df7190ff339ccb66fee9a6d48aaaf3ede6'}, +] + +builddependencies = [ + ('binutils', '2.42'), + ('Python', '3.12.3'), + ('Zip', '3.0'), +] + +dependencies = [ + ('Java', '11.0.20', '', SYSTEM), +] + +runtest = True +testopts = "-- //examples/cpp:hello-success_test //examples/py/... //examples/py_native:test //examples/shell/..." + +moduleclass = 'devel' diff --git a/easybuild/easyconfigs/b/Bazel/Bazel-6.5.0_py3.12_pytest_assertEqual.patch b/easybuild/easyconfigs/b/Bazel/Bazel-6.5.0_py3.12_pytest_assertEqual.patch new file mode 100644 index 00000000000..29b9c10b8b4 --- /dev/null +++ b/easybuild/easyconfigs/b/Bazel/Bazel-6.5.0_py3.12_pytest_assertEqual.patch @@ -0,0 +1,31 @@ +# Thomas Hoffmann, EMBL Heidelberg, structures-it@embl.de, 2025/01 +# replace assertEquals by assertEqual +# https://docs.python.org/3/whatsnew/3.12.html#id3 +diff -ru bazel-6.5.0/examples/py_native/fail.py bazel-6.5.0_pytest_assertEqual/examples/py_native/fail.py +--- bazel-6.5.0/examples/py_native/fail.py 1980-01-01 00:00:00.000000000 +0100 ++++ bazel-6.5.0_pytest_assertEqual/examples/py_native/fail.py 2025-01-24 14:27:22.973336188 +0100 +@@ -6,7 +6,7 @@ + class TestGetNumber(unittest.TestCase): + + def test_fail(self): +- self.assertEquals(GetNumber(), 0) ++ self.assertEqual(GetNumber(), 0) + + + if __name__ == '__main__': +diff -ru bazel-6.5.0/examples/py_native/test.py bazel-6.5.0_pytest_assertEqual/examples/py_native/test.py +--- bazel-6.5.0/examples/py_native/test.py 1980-01-01 00:00:00.000000000 +0100 ++++ bazel-6.5.0_pytest_assertEqual/examples/py_native/test.py 2025-01-24 14:27:22.973336188 +0100 +@@ -8,10 +8,10 @@ + class TestGetNumber(unittest.TestCase): + + def test_ok(self): +- self.assertEquals(GetNumber(), 42) ++ self.assertEqual(GetNumber(), 42) + + def test_fib(self): +- self.assertEquals(Fib(5), 8) ++ self.assertEqual(Fib(5), 8) + + if __name__ == '__main__': + unittest.main() From 96874a4d2ba23bc180971cf26cb5c76293016dd7 Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Thu, 6 Mar 2025 13:12:25 +0100 Subject: [PATCH 27/32] Delete easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb --- .../m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb | 56 ------------------- 1 file changed, 56 deletions(-) delete mode 100644 easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb diff --git a/easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb b/easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb deleted file mode 100644 index 5a20f1d8458..00000000000 --- a/easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb +++ /dev/null @@ -1,56 +0,0 @@ -# Thomas Hoffmann, EMBL Heidelberg, structures-it@embl.de, 2024/11 -easyblock = 'PythonBundle' - -name = 'ml_dtypes' -version = '0.5.0' - -homepage = 'https://github.com/jax-ml/ml_dtypes' -description = """ -ml_dtypes is a stand-alone implementation of several NumPy dtype extensions used -in machine learning libraries, including: - -bfloat16: an alternative to the standard float16 format -float8_*: several experimental 8-bit floating point representations including: -float8_e4m3b11fnuz -float8_e4m3fn -float8_e4m3fnuz -float8_e5m2 -float8_e5m2fnuz -""" - -toolchain = {'name': 'gfbf', 'version': '2024a'} - -builddependencies = [ - ('poetry', '1.8.3'), -] - -dependencies = [ - ('Python', '3.12.3'), - # ('SciPy-bundle', '2024.11'), ? - ('SciPy-bundle', '2024.05'), -] - - -use_pip = True - -default_easyblock = 'PythonPackage' - -exts_list = [ - ('opt_einsum', '3.4.0', { - 'checksums': ['96ca72f1b886d148241348783498194c577fa30a8faac108586b14f1ba4473ac'], - }), - ('etils', '1.10.0', { - 'checksums': ['4eaa9d7248fd4eeb75e44d47ca29875a5ccea044cc14a17435794bf8ac116a05'], - }), - (name, version, { - 'patches': [('ml_dtypes-0.3.2_EigenAvx512.patch', 1)], - 'checksums': [ - {'ml_dtypes-0.5.0.tar.gz': '3e7d3a380fe73a63c884f06136f8baa7a5249cc8e9fdec677997dd78549f8128'}, - {'ml_dtypes-0.3.2_EigenAvx512.patch': '197b05b0b7f611749824369f026099f6a172f9e8eab6ebb6504a16573746c892'}, - ], - }), -] - -sanity_pip_check = True - -moduleclass = 'tools' From 8acb285a82174f1cbb65c8af8d65c898d7c1b2ec Mon Sep 17 00:00:00 2001 From: thoffman Date: Tue, 20 May 2025 14:06:23 +0200 Subject: [PATCH 28/32] switch to 0.4.35; read xla_gpu_cuda_data_dir from --- .../jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb | 216 ++++++++++++++++++ ...x-pybind11-systemlib_cupti_CUDA_HOME.patch | 177 ++++++++++++++ 2 files changed, 393 insertions(+) create mode 100644 easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb create mode 100644 easybuild/easyconfigs/j/jax/jax-0.4.35_fix-pybind11-systemlib_cupti_CUDA_HOME.patch diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb new file mode 100644 index 00000000000..4b60a90867a --- /dev/null +++ b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb @@ -0,0 +1,216 @@ +# This file is an EasyBuild reciPY as per https://github.com/easybuilders/easybuild +# Author: Denis Kristak +# Updated by: Alex Domingo (Vrije Universiteit Brussel) +# Updated by: Pavel Tománek (INUITS) +# Updated by: Thomas Hoffmann (EMBL Heidelberg) +easyblock = 'PythonBundle' + +name = 'jax' +version = '0.4.35' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://jax.readthedocs.io/' +description = """Composable transformations of Python+NumPy programs: +differentiate, vectorize, JIT to GPU/TPU, and more""" + +toolchain = {'name': 'gfbf', 'version': '2024a'} +cuda_compute_capabilities = ["5.0", "6.0", "6.1", "7.0", "7.5", "8.0", "8.6", "9.0"] + +builddependencies = [ + # ('Bazel', '7.4.1'), TODO: problems with @@local_config_python//:py3_runtime: + # Error in fail: interpreter_path must be an absolute path + # Bazel 6.5.0 (download) works. + ('pybind11', '2.13.6'), # 2.12.0 ? SciPy-bundle has pybind/2.12.0. + ('pytest-xdist', '3.6.1'), + ('git', '2.45.1'), # bazel uses git to fetch repositories + ('matplotlib', '3.9.2'), # required for tests/lobpcg_test.py + ('poetry', '1.8.3'), + ('Clang', '18.1.8', versionsuffix) +] + +dependencies = [ + ('CUDA', '12.6.0', '', SYSTEM), # 12.6.2 ? + ('cuDNN', '9.5.0.50', versionsuffix, SYSTEM), + ('NCCL', '2.22.3', versionsuffix), + ('Python', '3.12.3'), + ('SciPy-bundle', '2024.05'), # 2024.11 ? + ('absl-py', '2.1.0'), + ('flatbuffers-python', '24.3.25'), + ('ml_dtypes', '0.5.0'), + ('zlib', '1.3.1'), +] + +# downloading xla and other tarballs to avoid that Bazel downloads it during the build +local_extract_cmd = 'mkdir -p %(builddir)s/archives && cp %s %(builddir)s/archives' +# note: following commits *must* be the exact same onces used upstream +# XLA_COMMIT from jax-jaxlib: third_party/xla/workspace.bzl +local_xla_commit = '76da730179313b3bebad6dea6861768421b7358c' +# TFRT_COMMIT from xla: third_party/tsl/third_party/tf_runtime/workspace.bzl +local_tfrt_commit = '0aeefb1660d7e37964b2bb71b1f518096bda9a25' # TODO: still required? +# TODO: add other downloads + +# Use sources downloaded by EasyBuild +_jaxlib_buildopts = '--bazel_options="--distdir=%(builddir)s/archives" ' +# Use dependencies from EasyBuild +_jaxlib_buildopts += '--bazel_options="--action_env=TF_SYSTEM_LIBS=pybind11" ' +_jaxlib_buildopts += '--bazel_options="--action_env=CPATH=$EBROOTPYBIND11/include:$EBROOTCUDA/extras/CUPTI/include" ' +# Avoid warning (treated as error) in upb/table.c +_jaxlib_buildopts += '--bazel_options="--copt=-Wno-maybe-uninitialized" ' # TODO: still required? +# _jaxlib_buildopts += '--nouse_clang ' #TODO: avoid clang (?) +_jaxlib_buildopts += '--cuda_version=%(cudaver)s ' +_jaxlib_buildopts += '--python_bin_path=$EBROOTPYTHON/bin/python3 ' +# Do not use hermetic CUDA/cuDNN/NCCL: (requires action_env=CPATH=$EBROOTCUDA/extras/CUPTI/include"; +# requires patch of external/xla/xla/tsl/cuda/cupti_stub.cc and jaxlib/gpu/vendor.h (#include ): +_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDNN_PATH="$EBROOTCUDNN" """ +_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_NCCL_PATH="$EBROOTNCCL" """ +_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDA_PATH="$EBROOTCUDA" """ +_jaxlib_buildopts += """--bazel_options="--copt=-Ithird_party/gpus/cuda/extras/CUPTI/include" """ + +_plugins_buildopts = """--enable_cuda """ +_plugins_buildopts += """--build_gpu_plugin """ +# _plugins_buildopts +="""--gpu_plugin_cuda_version=12 """ +_plugins_buildopts += """--build_gpu_pjrt_plugin """ +_plugins_buildopts += """--build_gpu_kernel_plugin=cuda """ + +# get rid of .devDate versionsuffix: TODO: find a better way +# _no_devtag = """ export JAX_RELEASE && export JAXLIB_RELEASE && """ does not work (?) +_no_devtag = """ sed -i "s/version=__version__/version='%(version)s'/g" setup.py && """ +_jaxlib_buildopts += """--bazel_options="--action_env=JAXLIB_RELEASE=1" """ # required? + +components = [ + ('jaxlib', version, { + 'sources': [ + { + 'source_urls': ['https://github.com/google/jax/archive/'], + 'filename': 'jax-v%(version)s.tar.gz', + }, + { + 'source_urls': ['https://github.com/openxla/xla/archive'], + 'download_filename': '%s.tar.gz' % local_xla_commit, + 'filename': 'xla-%s.tar.gz' % local_xla_commit[:8], + 'extract_cmd': local_extract_cmd, + }, + { + 'source_urls': ['https://github.com/tensorflow/runtime/archive'], + 'download_filename': '%s.tar.gz' % local_tfrt_commit, + 'filename': 'tf_runtime-%s.tar.gz' % local_tfrt_commit[:8], + 'extract_cmd': local_extract_cmd, + }, + ], + 'patches': [ + 'jax-0.4.35_easyblock_compat.patch', + 'jax-0.4.35_fix-pybind11-systemlib_cupti_CUDA_HOME.patch', + 'jax-0.4.35_version.patch', + ], + 'checksums': [ + {'jax-v0.4.35.tar.gz': + '65e086708ae56670676b7b2340ad82b901d8c9993d1241a839c8990bdb8d6212'}, + {'xla-76da7301.tar.gz': + 'd67ced09b69ab8d7b26fa4cd5f48b22db57eb330294a35f6e1d462ee17066757'}, + {'tf_runtime-0aeefb16.tar.gz': + 'a3df827d7896774cb1d80bf4e1c79ab05c268f29bd4d3db1fb5a4b9c2079d8e3'}, + {'jax-0.4.35_easyblock_compat.patch': + 'cbf4ad92b8438c4ce2a975efce1c47c57d4c3b117bceee071ab660f964057223'}, + {'jax-0.4.35_fix-pybind11-systemlib_cupti_CUDA_HOME.patch': + 'fa5273d31651579590f7291fc151836f43024f74f4c89243dc4c6a417284e7ce'}, + {'jax-0.4.35_version.patch': + 'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'}, + ], + 'start_dir': 'jax-jax-v%(version)s', + 'buildopts': _jaxlib_buildopts, + 'prebuildopts': ' mkdir third_party/gpus/cuda/extras/ -p && ' + + 'ln -s $EBROOTCUDA/extras/CUPTI third_party/gpus/cuda/extras --relative &&' + + _no_devtag + }), + # build jaxlib first and then plugins in 2nd interation: + ('jaxlib', version, { + 'sources': [ + { + 'source_urls': ['https://github.com/google/jax/archive/'], + 'filename': 'jax-v%(version)s.tar.gz', + }, + { + 'source_urls': ['https://github.com/openxla/xla/archive'], + 'download_filename': '%s.tar.gz' % local_xla_commit, + 'filename': 'xla-%s.tar.gz' % local_xla_commit[:8], + 'extract_cmd': local_extract_cmd, + }, + { + 'source_urls': ['https://github.com/tensorflow/runtime/archive'], + 'download_filename': '%s.tar.gz' % local_tfrt_commit, + 'filename': 'tf_runtime-%s.tar.gz' % local_tfrt_commit[:8], + 'extract_cmd': local_extract_cmd, + }, + ], + 'checksums': [ + {'jax-v0.4.35.tar.gz': + '65e086708ae56670676b7b2340ad82b901d8c9993d1241a839c8990bdb8d6212'}, + {'xla-76da7301.tar.gz': + 'd67ced09b69ab8d7b26fa4cd5f48b22db57eb330294a35f6e1d462ee17066757'}, + {'tf_runtime-0aeefb16.tar.gz': + 'a3df827d7896774cb1d80bf4e1c79ab05c268f29bd4d3db1fb5a4b9c2079d8e3'}, + ], + 'start_dir': 'jax-jax-v%(version)s', + 'buildopts': _jaxlib_buildopts + _plugins_buildopts, + 'prebuildopts': _no_devtag + }), +] +# failing: +# tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex128 FAILED [ 98%] +# tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex64 FAILED [ 98%] +# tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex128 FAILED [ 99%] +# tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex64 FAILED [ 99%] +# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex128 - AssertionError: +# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex64 - AssertionError: +# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex128 - AssertionError: +# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex64 - AssertionError: +# tests/nn_test.py::NNFunctionsTest::testDotProductAttentionMask7 FAILED [ 10%] +# FAILED tests/nn_test.py::NNFunctionsTest::testDotProductAttentionMask7 - AssertionError: +# + + +# Some tests require an isolated run: TODO: still required? +local_isolated_tests = [ + 'tests/host_callback_test.py::HostCallbackTapTest::test_tap_scan_custom_jvp', + 'tests/host_callback_test.py::HostCallbackTapTest::test_tap_transforms_doc', + 'tests/lax_scipy_special_functions_test.py::LaxScipySpcialFunctionsTest' + + '::testScipySpecialFun_gammainc_s_2x1x4_float32_float32', +] +# deliberately not testing in parallel, as that results in (additional) failing tests; +# use XLA_PYTHON_CLIENT_ALLOCATOR=platform to allocate and deallocate GPU memory during testing, +# see https://github.com/google/jax/issues/7323 and +# https://github.com/google/jax/blob/main/docs/gpu_memory_allocation.rst; +# use CUDA_VISIBLE_DEVICES=0 to avoid failing tests on systems with multiple GPUs; +# use NVIDIA_TF32_OVERRIDE=0 to avoid loosing numerical precision by disabling TF32 Tensor Cores; +local_test_exports = [ + "NVIDIA_TF32_OVERRIDE=0", + "CUDA_VISIBLE_DEVICES=0", + "XLA_PYTHON_CLIENT_ALLOCATOR=platform", + "JAX_ENABLE_X64=true", +] +local_test = ''.join(['export %s;' % x for x in local_test_exports]) +# run all tests at once except for local_isolated_tests: +local_test += "pytest -vv tests %s && " % ' '.join(['--deselect %s' % x for x in local_isolated_tests]) +# run remaining local_isolated_tests separately: +local_test += ' && '.join(['pytest -vv %s' % x for x in local_isolated_tests]) + + +exts_list = [ + (name, version, { + 'source_tmpl': '%(name)s-v%(version)s.tar.gz', + 'source_urls': ['https://github.com/google/jax/archive/'], + 'patches': ['jax-0.4.35_version.patch'], + 'checksums': [ + {'jax-v0.4.35.tar.gz': '65e086708ae56670676b7b2340ad82b901d8c9993d1241a839c8990bdb8d6212'}, + {'jax-0.4.35_version.patch': 'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'}, + ], + 'runtest': False, # tmp + 'preinstallopts': _no_devtag + }), +] +sanity_check_commands = [ + """python -c "import jax_cuda"$(echo $EBVERSIONCUDA|awk -F '.' '{print $1}')"_plugin" """ +] + + +moduleclass = 'ai' diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35_fix-pybind11-systemlib_cupti_CUDA_HOME.patch b/easybuild/easyconfigs/j/jax/jax-0.4.35_fix-pybind11-systemlib_cupti_CUDA_HOME.patch new file mode 100644 index 00000000000..9bfe292e33d --- /dev/null +++ b/easybuild/easyconfigs/j/jax/jax-0.4.35_fix-pybind11-systemlib_cupti_CUDA_HOME.patch @@ -0,0 +1,177 @@ +jax-0.4.25_fix-pybind11-systemlib.patch: Add missing value for System Pybind11 Bazel config +jax-0.4.25_fix-pybind11-systemlib.patch: Author: Alexander Grund (TU Dresden) + +THEMBL: fix cupti include path. xla-76da73_cuda_home.patch + +diff --git a/third_party/xla/fix-pybind11-systemlib.patch b/third_party/xla/fix-pybind11-systemlib.patch +new file mode 100644 +index 000000000..68bd2063d +--- /dev/null ++++ b/third_party/xla/fix-pybind11-systemlib.patch +@@ -0,0 +1,13 @@ ++--- xla-orig/third_party/tsl/third_party/systemlibs/pybind11.BUILD +++++ xla-4ccfe33c71665ddcbca5b127fefe8baa3ed632d4/third_party/tsl/third_party/systemlibs/pybind11.BUILD ++@@ -6,3 +6,10 @@ ++ "@tsl//third_party/python_runtime:headers", ++ ], ++ ) +++ +++# Needed by pybind11_bazel. +++config_setting( +++ name = "osx", +++ constraint_values = ["@platforms//os:osx"], +++) +++ +diff -ruN jax-jax-v0.4.35/jaxlib/gpu/vendor.h jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib_CUDA_HOME/jaxlib/gpu/vendor.h +--- jax-jax-v0.4.35/jaxlib/gpu/vendor.h 2024-10-22 21:00:23.000000000 +0200 ++++ jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib_CUDA_HOME/jaxlib/gpu/vendor.h 2024-11-26 10:56:20.396087442 +0100 +@@ -23,7 +23,7 @@ + #if defined(JAX_GPU_CUDA) + + // IWYU pragma: begin_exports +-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" ++#include + #include "third_party/gpus/cuda/include/cooperative_groups.h" + #include "third_party/gpus/cuda/include/cuComplex.h" + #include "third_party/gpus/cuda/include/cublas_v2.h" +diff -ruN jax-jax-v0.4.35/third_party/xla/workspace.bzl jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib_CUDA_HOME/third_party/xla/workspace.bzl +--- jax-jax-v0.4.35/third_party/xla/workspace.bzl 2024-10-22 21:00:23.000000000 +0200 ++++ jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib_CUDA_HOME/third_party/xla/workspace.bzl 2025-05-13 17:10:47.808768995 +0200 +@@ -30,6 +30,12 @@ + sha256 = XLA_SHA256, + strip_prefix = "xla-{commit}".format(commit = XLA_COMMIT), + urls = tf_mirror_urls("https://github.com/openxla/xla/archive/{commit}.tar.gz".format(commit = XLA_COMMIT)), ++ patch_file = [ ++ "//third_party/xla:xla-76da73_cupti.patch", ++ "//third_party/xla:fix-pybind11-systemlib.patch", ++ "//third_party/xla:xla-76da73_cuda_home.patch", ++ ], ++ + ) + + # For development, one often wants to make changes to the TF repository as well +diff -ruN jax-jax-v0.4.35/third_party/xla/xla-76da73_cuda_home.patch jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib_CUDA_HOME/third_party/xla/xla-76da73_cuda_home.patch +--- jax-jax-v0.4.35/third_party/xla/xla-76da73_cuda_home.patch 1970-01-01 01:00:00.000000000 +0100 ++++ jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib_CUDA_HOME/third_party/xla/xla-76da73_cuda_home.patch 2025-05-13 17:06:32.728189184 +0200 +@@ -0,0 +1,29 @@ ++diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/third_party/tsl/tsl/platform/default/cuda_root_path.cc xla-76da730179313b3bebad6dea6861768421b7358c_cuda_home/third_party/tsl/tsl/platform/default/cuda_root_path.cc ++--- xla-76da730179313b3bebad6dea6861768421b7358c/third_party/tsl/tsl/platform/default/cuda_root_path.cc 2024-10-21 20:29:31.000000000 +0200 +++++ xla-76da730179313b3bebad6dea6861768421b7358c_cuda_home/third_party/tsl/tsl/platform/default/cuda_root_path.cc 2025-01-23 14:56:01.967843644 +0100 ++@@ -34,10 +34,15 @@ ++ #include "tsl/platform/env.h" ++ #endif ++ #include "tsl/platform/logging.h" ++- +++#include ++ namespace tsl { ++ ++ std::vector CandidateCudaRoots() { +++const char* env_p = std::getenv("CUDA_HOME"); +++if (!env_p) {return std::vector();} +++const std::string S=env_p; +++return std::vector(1, S); +++#if 0 ++ #if !defined(PLATFORM_GOOGLE) ++ auto roots = std::vector{}; ++ std::string runfiles_suffix = "runfiles"; ++@@ -83,6 +88,7 @@ ++ #else // !defined(PLATFORM_GOOGLE) ++ return {}; ++ #endif //! defined(PLATFORM_GOOGLE) +++#endif ++ } ++ ++ bool PreferPtxasFromPath() { return true; } ++Binary files xla-76da730179313b3bebad6dea6861768421b7358c/xla/service/gpu/.nvptx_compiler.cc.swp and xla-76da730179313b3bebad6dea6861768421b7358c_cuda_home/xla/service/gpu/.nvptx_compiler.cc.swp differ +diff -ruN jax-jax-v0.4.35/third_party/xla/xla-76da73_cupti.patch jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib_CUDA_HOME/third_party/xla/xla-76da73_cupti.patch +--- jax-jax-v0.4.35/third_party/xla/xla-76da73_cupti.patch 1970-01-01 01:00:00.000000000 +0100 ++++ jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib_CUDA_HOME/third_party/xla/xla-76da73_cupti.patch 2025-01-17 15:44:11.545694652 +0100 +@@ -0,0 +1,88 @@ ++Binary files xla-76da730179313b3bebad6dea6861768421b7358c/third_party/tsl/third_party/gpus/cuda/hermetic/.cuda_configure.bzl.swp and xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/third_party/tsl/third_party/gpus/cuda/hermetic/.cuda_configure.bzl.swp differ ++Binary files xla-76da730179313b3bebad6dea6861768421b7358c/third_party/tsl/third_party/gpus/.find_cuda_config.py.swp and xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/third_party/tsl/third_party/gpus/.find_cuda_config.py.swp differ ++diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_collector.cc xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_collector.cc ++--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_collector.cc 2024-10-21 20:29:31.000000000 +0200 +++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_collector.cc 2025-01-17 10:22:48.947856740 +0100 ++@@ -24,8 +24,8 @@ ++ #include "absl/hash/hash.h" ++ #include "absl/strings/str_cat.h" ++ #include "absl/strings/str_join.h" ++-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" ++-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti_activity.h" +++#include +++#include ++ #include "third_party/gpus/cuda/include/cuda.h" ++ #include "third_party/gpus/cuda/include/cuda_occupancy.h" ++ #include "xla/tsl/profiler/utils/parse_annotation.h" ++diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_interface.h xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_interface.h ++--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_interface.h 2024-10-21 20:29:31.000000000 +0200 +++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_interface.h 2025-01-17 10:22:48.947856740 +0100 ++@@ -19,7 +19,7 @@ ++ #include ++ #include ++ ++-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" +++#include ++ #include "third_party/gpus/cuda/include/cuda.h" ++ #include "tsl/platform/macros.h" ++ #include "tsl/platform/types.h" ++diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_profiler.h xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_profiler.h ++--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_profiler.h 2024-10-21 20:29:31.000000000 +0200 +++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_profiler.h 2025-01-17 10:22:48.947856740 +0100 ++@@ -18,7 +18,7 @@ ++ #include ++ #include ++ ++-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" +++#include ++ #include "xla/backends/profiler/gpu/cupti_interface.h" ++ #include "tsl/platform/types.h" ++ ++diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_tracer.cc xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_tracer.cc ++--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_tracer.cc 2024-10-21 20:29:31.000000000 +0200 +++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_tracer.cc 2025-01-17 14:50:00.284134999 +0100 ++@@ -24,7 +24,7 @@ ++ #include "absl/cleanup/cleanup.h" ++ #include "absl/container/flat_hash_set.h" ++ #include "absl/types/span.h" ++-#include "third_party/gpus/cuda/extras/CUPTI/include/generated_nvtx_meta.h" +++#include ++ #include "third_party/gpus/cuda/include/cuda.h" ++ #include "xla/backends/profiler/gpu/cupti_buffer_events.h" ++ #include "xla/backends/profiler/gpu/cupti_collector.h" ++diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_tracer.h xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_tracer.h ++--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_tracer.h 2024-10-21 20:29:31.000000000 +0200 +++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_tracer.h 2025-01-17 10:22:48.948856765 +0100 ++@@ -22,7 +22,7 @@ ++ ++ #include "absl/status/status.h" ++ #include "absl/types/optional.h" ++-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" +++#include ++ #include "third_party/gpus/cuda/include/nvtx3/nvToolsExt.h" ++ #include "xla/backends/profiler/gpu/cupti_collector.h" ++ #include "xla/backends/profiler/gpu/cupti_interface.h" ++diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_wrapper.h xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_wrapper.h ++--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_wrapper.h 2024-10-21 20:29:31.000000000 +0200 +++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_wrapper.h 2025-01-17 10:22:48.948856765 +0100 ++@@ -19,7 +19,7 @@ ++ #include ++ #include ++ ++-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" +++#include ++ #include "third_party/gpus/cuda/include/cuda.h" ++ #include "xla/backends/profiler/gpu/cupti_interface.h" ++ ++diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/tsl/cuda/cupti_stub.cc xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/tsl/cuda/cupti_stub.cc ++--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/tsl/cuda/cupti_stub.cc 2024-10-21 20:29:31.000000000 +0200 +++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/tsl/cuda/cupti_stub.cc 2025-01-17 10:22:48.948856765 +0100 ++@@ -13,7 +13,7 @@ ++ limitations under the License. ++ ==============================================================================*/ ++ ++-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" +++#include ++ #include "third_party/gpus/cuda/include/cuda.h" ++ #include "tsl/platform/dso_loader.h" ++ #include "tsl/platform/load_library.h" From 2ac7d2e1cf0d1d364732ea621437eb3caaf4e374 Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Tue, 20 May 2025 14:07:08 +0200 Subject: [PATCH 29/32] Delete easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb --- .../jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb | 229 ------------------ 1 file changed, 229 deletions(-) delete mode 100644 easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb deleted file mode 100644 index ba56113debb..00000000000 --- a/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb +++ /dev/null @@ -1,229 +0,0 @@ -# This file is an EasyBuild reciPY as per https://github.com/easybuilders/easybuild -# Author: Denis Kristak -# Updated by: Alex Domingo (Vrije Universiteit Brussel) -# Updated by: Pavel Tománek (INUITS) -# Updated by: Thomas Hoffmann (EMBL Heidelberg) -easyblock = 'PythonBundle' - -name = 'jax' -version = '0.4.34' -versionsuffix = '-CUDA-%(cudaver)s' - -homepage = 'https://jax.readthedocs.io/' -description = """Composable transformations of Python+NumPy programs: -differentiate, vectorize, JIT to GPU/TPU, and more""" - -toolchain = {'name': 'gfbf', 'version': '2024a'} -cuda_compute_capabilities = ["5.0", "6.0", "6.1", "7.0", "7.5", "8.0", "8.6", "9.0"] - -builddependencies = [ - # ('Bazel', '7.4.1'), TODO: problems with @@local_config_python//:py3_runtime: - # Error in fail: interpreter_path must be an absolute path - # Bazel 6.5.0 (download) works. - ('pybind11', '2.13.6'), - ('pytest-xdist', '3.6.1'), - ('git', '2.45.1'), # bazel uses git to fetch repositories - ('matplotlib', '3.9.2'), # required for tests/lobpcg_test.py - ('poetry', '1.8.3'), - ('Clang', '18.1.8', versionsuffix) -] - -dependencies = [ - ('CUDA', '12.6.0', '', SYSTEM), # 12.6.2 ? - ('cuDNN', '9.5.0.50', versionsuffix, SYSTEM), - ('NCCL', '2.22.3', versionsuffix), - ('Python', '3.12.3'), - ('SciPy-bundle', '2024.05'), # 2024.11 ? - ('absl-py', '2.1.0'), - ('flatbuffers-python', '24.3.25'), - ('ml_dtypes', '0.5.0'), - ('zlib', '1.3.1'), -] - -# downloading xla and other tarballs to avoid that Bazel downloads it during the build -local_extract_cmd = 'mkdir -p %(builddir)s/archives && cp %s %(builddir)s/archives' -# note: following commits *must* be the exact same onces used upstream -# XLA_COMMIT from jax-jaxlib: third_party/xla/workspace.bzl -local_xla_commit = 'cd6e808c59f53b40a99df1f1b860db9a3e598bff' -# TFRT_COMMIT from xla: third_party/tsl/third_party/tf_runtime/workspace.bzl -local_tfrt_commit = '0aeefb1660d7e37964b2bb71b1f518096bda9a25' # TODO: still required? -# TODO: add other downloads - -# Use sources downloaded by EasyBuild -_jaxlib_buildopts = '--bazel_options="--distdir=%(builddir)s/archives" ' -# Use dependencies from EasyBuild -_jaxlib_buildopts += '--bazel_options="--action_env=TF_SYSTEM_LIBS=pybind11" ' -_jaxlib_buildopts += '--bazel_options="--action_env=CPATH=$EBROOTPYBIND11/include:$EBROOTCUDA/extras/CUPTI/include" ' -# Avoid warning (treated as error) in upb/table.c -_jaxlib_buildopts += '--bazel_options="--copt=-Wno-maybe-uninitialized" ' # TODO: still required? -# _jaxlib_buildopts += '--nouse_clang ' #TODO: avoid clang (?) -_jaxlib_buildopts += '--cuda_version=%(cudaver)s ' -_jaxlib_buildopts += '--python_bin_path=$EBROOTPYTHON/bin/python3 ' -# Do not use hermetic CUDA/cuDNN/NCCL: (requires action_env=CPATH=$EBROOTCUDA/extras/CUPTI/include"; -# requires patch of external/xla/xla/tsl/cuda/cupti_stub.cc and jaxlib/gpu/vendor.h (#include ): -_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDNN_PATH="$EBROOTCUDNN" """ -_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_NCCL_PATH="$EBROOTNCCL" """ -_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDA_PATH="$EBROOTCUDA" """ -_jaxlib_buildopts += """--bazel_options="--copt=-Ithird_party/gpus/cuda/extras/CUPTI/include" """ - -_plugins_buildopts = """--enable_cuda """ -_plugins_buildopts += """--build_gpu_plugin """ -# _plugins_buildopts +="""--gpu_plugin_cuda_version=12 """ -_plugins_buildopts += """--build_gpu_pjrt_plugin """ -_plugins_buildopts += """--build_gpu_kernel_plugin=cuda """ - -# get rid of .devDate versionsuffix: TODO: find a better way -# _no_devtag = """ export JAX_RELEASE && export JAXLIB_RELEASE && """ does not work (?) -_no_devtag = """ sed -i "s/version=__version__/version='%(version)s'/g" setup.py && """ -_jaxlib_buildopts += """--bazel_options="--action_env=JAXLIB_RELEASE=1" """ # required? - -components = [ - ('jaxlib', version, { - 'sources': [ - { - 'source_urls': ['https://github.com/google/jax/archive/'], - 'filename': 'jax-v%(version)s.tar.gz', - }, - { - 'source_urls': ['https://github.com/openxla/xla/archive'], - 'download_filename': '%s.tar.gz' % local_xla_commit, - 'filename': 'xla-%s.tar.gz' % local_xla_commit[:8], - 'extract_cmd': local_extract_cmd, - }, - { - 'source_urls': ['https://github.com/tensorflow/runtime/archive'], - 'download_filename': '%s.tar.gz' % local_tfrt_commit, - 'filename': 'tf_runtime-%s.tar.gz' % local_tfrt_commit[:8], - 'extract_cmd': local_extract_cmd, - }, - ], - 'patches': [ - 'jax-0.4.35_easyblock_compat.patch', - 'jax-0.4.35_fix-pybind11-systemlib_cupti.patch', - 'jax-0.4.35_version.patch', - ], - 'checksums': [ - {'jax-v0.4.34.tar.gz': - 'd3a75ad667772309ade81350fa70c4a78028a920028800282e46d8383c0ee6bb'}, - {'xla-cd6e808c.tar.gz': - '65cb6d63ef4083b35775052636cb9c629f86db6947c8b91711923ba31dbdcde8'}, - {'tf_runtime-0aeefb16.tar.gz': - 'a3df827d7896774cb1d80bf4e1c79ab05c268f29bd4d3db1fb5a4b9c2079d8e3'}, - {'jax-0.4.35_easyblock_compat.patch': - 'cbf4ad92b8438c4ce2a975efce1c47c57d4c3b117bceee071ab660f964057223'}, - {'jax-0.4.35_fix-pybind11-systemlib_cupti.patch': - '51369589193be60dc94ec2de1b35d0a9268288578903fb05d41b6d1a8c9df460'}, - {'jax-0.4.35_version.patch': - 'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'}, - ], - 'start_dir': 'jax-jax-v%(version)s', - 'buildopts': _jaxlib_buildopts, - 'prebuildopts': ' mkdir third_party/gpus/cuda/extras/ -p && ' + - 'ln -s $EBROOTCUDA/extras/CUPTI third_party/gpus/cuda/extras --relative &&' + - _no_devtag - }), - # build jaxlib first and then plugins in 2nd interation: - ('jaxlib', version, { - 'sources': [ - { - 'source_urls': ['https://github.com/google/jax/archive/'], - 'filename': 'jax-v%(version)s.tar.gz', - }, - { - 'source_urls': ['https://github.com/openxla/xla/archive'], - 'download_filename': '%s.tar.gz' % local_xla_commit, - 'filename': 'xla-%s.tar.gz' % local_xla_commit[:8], - 'extract_cmd': local_extract_cmd, - }, - { - 'source_urls': ['https://github.com/tensorflow/runtime/archive'], - 'download_filename': '%s.tar.gz' % local_tfrt_commit, - 'filename': 'tf_runtime-%s.tar.gz' % local_tfrt_commit[:8], - 'extract_cmd': local_extract_cmd, - }, - ], - 'checksums': [ - {'jax-v0.4.34.tar.gz': - 'd3a75ad667772309ade81350fa70c4a78028a920028800282e46d8383c0ee6bb'}, - {'xla-cd6e808c.tar.gz': - '65cb6d63ef4083b35775052636cb9c629f86db6947c8b91711923ba31dbdcde8'}, - {'tf_runtime-0aeefb16.tar.gz': - 'a3df827d7896774cb1d80bf4e1c79ab05c268f29bd4d3db1fb5a4b9c2079d8e3'}, - ], - 'start_dir': 'jax-jax-v%(version)s', - 'buildopts': _jaxlib_buildopts + _plugins_buildopts, - 'prebuildopts': _no_devtag - }), - -] -# failing: -# tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex128 FAILED [ 98%] -# tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex64 FAILED [ 98%] -# tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex128 FAILED [ 99%] -# tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex64 FAILED [ 99%] -# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex128 - AssertionError: -# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex64 - AssertionError: -# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex128 - AssertionError: -# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex64 - AssertionError: -# tests/nn_test.py::NNFunctionsTest::testDotProductAttentionMask7 FAILED [ 10%] -# FAILED tests/nn_test.py::NNFunctionsTest::testDotProductAttentionMask7 - AssertionError: -# - -# Some tests require an isolated run: TODO: still required? -local_isolated_tests = [ - 'tests/host_callback_test.py::HostCallbackTapTest::test_tap_scan_custom_jvp', - 'tests/host_callback_test.py::HostCallbackTapTest::test_tap_transforms_doc', - 'tests/lax_scipy_special_functions_test.py::LaxScipySpcialFunctionsTest' + - '::testScipySpecialFun_gammainc_s_2x1x4_float32_float32', -] -# deliberately not testing in parallel, as that results in (additional) failing tests; -# use XLA_PYTHON_CLIENT_ALLOCATOR=platform to allocate and deallocate GPU memory during testing, -# see https://github.com/google/jax/issues/7323 and -# https://github.com/google/jax/blob/main/docs/gpu_memory_allocation.rst; -# use CUDA_VISIBLE_DEVICES=0 to avoid failing tests on systems with multiple GPUs; -# use NVIDIA_TF32_OVERRIDE=0 to avoid loosing numerical precision by disabling TF32 Tensor Cores; -local_test_exports = [ - "NVIDIA_TF32_OVERRIDE=0", - "CUDA_VISIBLE_DEVICES=0", - "XLA_PYTHON_CLIENT_ALLOCATOR=platform", - "JAX_ENABLE_X64=true", -] -local_test = ''.join(['export %s;' % x for x in local_test_exports]) -# run all tests at once except for local_isolated_tests: -local_test += "pytest -vv tests %s && " % ' '.join(['--deselect %s' % x for x in local_isolated_tests]) -# run remaining local_isolated_tests separately: -local_test += ' && '.join(['pytest -vv %s' % x for x in local_isolated_tests]) - -use_pip = True - -exts_list = [ - (name, version, { - 'patches': ['jax-0.4.35_version.patch'], - 'preinstallopts': _no_devtag, - 'runtest': False, - 'source_tmpl': '%(name)s-v%(version)s.tar.gz', - 'source_urls': ['https://github.com/google/jax/archive/'], - 'checksums': [ - {'jax-v0.4.34.tar.gz': 'd3a75ad667772309ade81350fa70c4a78028a920028800282e46d8383c0ee6bb'}, - {'jax-0.4.35_version.patch': 'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'}, - ], - }), -] -sanity_check_commands = [ - """python -c "import jax_cuda"$(echo $EBVERSIONCUDA|awk -F '.' '{print $1}')"_plugin" """ -] -sanity_pip_check = True - -# TODO: patch to set default XLA_FLAGS -modluafooter = """ -setenv("XLA_FLAGS", "--xla_gpu_cuda_data_dir=" .. os.getenv("CUDA_HOME")); -""" - -modtclfooter = """ -setenv XLA_FLAGS --xla_gpu_cuda_data_dir=$::env(CUDA_HOME) -""" - -# TODO: sanity check paths - - -moduleclass = 'ai' From 65356d7bbcec0d5354bdc7f39c26f983f8ec4f5a Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Tue, 20 May 2025 14:07:48 +0200 Subject: [PATCH 30/32] Delete easybuild/easyconfigs/j/jax/jax-0.4.35_fix-pybind11-systemlib_cupti.patch --- ...-0.4.35_fix-pybind11-systemlib_cupti.patch | 143 ------------------ 1 file changed, 143 deletions(-) delete mode 100644 easybuild/easyconfigs/j/jax/jax-0.4.35_fix-pybind11-systemlib_cupti.patch diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35_fix-pybind11-systemlib_cupti.patch b/easybuild/easyconfigs/j/jax/jax-0.4.35_fix-pybind11-systemlib_cupti.patch deleted file mode 100644 index 5da275fb960..00000000000 --- a/easybuild/easyconfigs/j/jax/jax-0.4.35_fix-pybind11-systemlib_cupti.patch +++ /dev/null @@ -1,143 +0,0 @@ -jax-0.4.25_fix-pybind11-systemlib.patch: Add missing value for System Pybind11 Bazel config -jax-0.4.25_fix-pybind11-systemlib.patch: Author: Alexander Grund (TU Dresden) - -THEMBL: fix cupti include path. - -diff --git a/third_party/xla/fix-pybind11-systemlib.patch b/third_party/xla/fix-pybind11-systemlib.patch -new file mode 100644 -index 000000000..68bd2063d ---- /dev/null -+++ b/third_party/xla/fix-pybind11-systemlib.patch -@@ -0,0 +1,13 @@ -+--- xla-orig/third_party/tsl/third_party/systemlibs/pybind11.BUILD -++++ xla-4ccfe33c71665ddcbca5b127fefe8baa3ed632d4/third_party/tsl/third_party/systemlibs/pybind11.BUILD -+@@ -6,3 +6,10 @@ -+ "@tsl//third_party/python_runtime:headers", -+ ], -+ ) -++ -++# Needed by pybind11_bazel. -++config_setting( -++ name = "osx", -++ constraint_values = ["@platforms//os:osx"], -++) -++ -diff -ruN jax-jax-v0.4.35/jaxlib/gpu/vendor.h jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib/jaxlib/gpu/vendor.h ---- jax-jax-v0.4.35/jaxlib/gpu/vendor.h 2024-10-22 21:00:23.000000000 +0200 -+++ jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib/jaxlib/gpu/vendor.h 2024-11-26 10:56:20.396087442 +0100 -@@ -23,7 +23,7 @@ - #if defined(JAX_GPU_CUDA) - - // IWYU pragma: begin_exports --#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" -+#include - #include "third_party/gpus/cuda/include/cooperative_groups.h" - #include "third_party/gpus/cuda/include/cuComplex.h" - #include "third_party/gpus/cuda/include/cublas_v2.h" -diff -ruN jax-jax-v0.4.35/third_party/xla/workspace.bzl jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib/third_party/xla/workspace.bzl ---- jax-jax-v0.4.35/third_party/xla/workspace.bzl 2024-10-22 21:00:23.000000000 +0200 -+++ jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib/third_party/xla/workspace.bzl 2024-11-27 12:17:37.913466273 +0100 -@@ -30,6 +30,11 @@ - sha256 = XLA_SHA256, - strip_prefix = "xla-{commit}".format(commit = XLA_COMMIT), - urls = tf_mirror_urls("https://github.com/openxla/xla/archive/{commit}.tar.gz".format(commit = XLA_COMMIT)), -+ patch_file = [ -+ "//third_party/xla:xla-76da73_cupti.patch", -+ "//third_party/xla:fix-pybind11-systemlib.patch", -+ ], -+ - ) - - # For development, one often wants to make changes to the TF repository as well -diff -ruN jax-jax-v0.4.35/third_party/xla/xla-76da73_cupti.patch jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib/third_party/xla/xla-76da73_cupti.patch ---- jax-jax-v0.4.35/third_party/xla/xla-76da73_cupti.patch 1970-01-01 01:00:00.000000000 +0100 -+++ jax-jax-v0.4.35_jaxlib_cupti__fix-pybind11-systemlib/third_party/xla/xla-76da73_cupti.patch 2025-01-17 15:44:11.545694652 +0100 -@@ -0,0 +1,88 @@ -+Binary files xla-76da730179313b3bebad6dea6861768421b7358c/third_party/tsl/third_party/gpus/cuda/hermetic/.cuda_configure.bzl.swp and xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/third_party/tsl/third_party/gpus/cuda/hermetic/.cuda_configure.bzl.swp differ -+Binary files xla-76da730179313b3bebad6dea6861768421b7358c/third_party/tsl/third_party/gpus/.find_cuda_config.py.swp and xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/third_party/tsl/third_party/gpus/.find_cuda_config.py.swp differ -+diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_collector.cc xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_collector.cc -+--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_collector.cc 2024-10-21 20:29:31.000000000 +0200 -++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_collector.cc 2025-01-17 10:22:48.947856740 +0100 -+@@ -24,8 +24,8 @@ -+ #include "absl/hash/hash.h" -+ #include "absl/strings/str_cat.h" -+ #include "absl/strings/str_join.h" -+-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" -+-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti_activity.h" -++#include -++#include -+ #include "third_party/gpus/cuda/include/cuda.h" -+ #include "third_party/gpus/cuda/include/cuda_occupancy.h" -+ #include "xla/tsl/profiler/utils/parse_annotation.h" -+diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_interface.h xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_interface.h -+--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_interface.h 2024-10-21 20:29:31.000000000 +0200 -++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_interface.h 2025-01-17 10:22:48.947856740 +0100 -+@@ -19,7 +19,7 @@ -+ #include -+ #include -+ -+-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" -++#include -+ #include "third_party/gpus/cuda/include/cuda.h" -+ #include "tsl/platform/macros.h" -+ #include "tsl/platform/types.h" -+diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_profiler.h xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_profiler.h -+--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_profiler.h 2024-10-21 20:29:31.000000000 +0200 -++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_profiler.h 2025-01-17 10:22:48.947856740 +0100 -+@@ -18,7 +18,7 @@ -+ #include -+ #include -+ -+-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" -++#include -+ #include "xla/backends/profiler/gpu/cupti_interface.h" -+ #include "tsl/platform/types.h" -+ -+diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_tracer.cc xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_tracer.cc -+--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_tracer.cc 2024-10-21 20:29:31.000000000 +0200 -++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_tracer.cc 2025-01-17 14:50:00.284134999 +0100 -+@@ -24,7 +24,7 @@ -+ #include "absl/cleanup/cleanup.h" -+ #include "absl/container/flat_hash_set.h" -+ #include "absl/types/span.h" -+-#include "third_party/gpus/cuda/extras/CUPTI/include/generated_nvtx_meta.h" -++#include -+ #include "third_party/gpus/cuda/include/cuda.h" -+ #include "xla/backends/profiler/gpu/cupti_buffer_events.h" -+ #include "xla/backends/profiler/gpu/cupti_collector.h" -+diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_tracer.h xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_tracer.h -+--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_tracer.h 2024-10-21 20:29:31.000000000 +0200 -++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_tracer.h 2025-01-17 10:22:48.948856765 +0100 -+@@ -22,7 +22,7 @@ -+ -+ #include "absl/status/status.h" -+ #include "absl/types/optional.h" -+-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" -++#include -+ #include "third_party/gpus/cuda/include/nvtx3/nvToolsExt.h" -+ #include "xla/backends/profiler/gpu/cupti_collector.h" -+ #include "xla/backends/profiler/gpu/cupti_interface.h" -+diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_wrapper.h xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_wrapper.h -+--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/backends/profiler/gpu/cupti_wrapper.h 2024-10-21 20:29:31.000000000 +0200 -++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/backends/profiler/gpu/cupti_wrapper.h 2025-01-17 10:22:48.948856765 +0100 -+@@ -19,7 +19,7 @@ -+ #include -+ #include -+ -+-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" -++#include -+ #include "third_party/gpus/cuda/include/cuda.h" -+ #include "xla/backends/profiler/gpu/cupti_interface.h" -+ -+diff -ruN xla-76da730179313b3bebad6dea6861768421b7358c/xla/tsl/cuda/cupti_stub.cc xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/tsl/cuda/cupti_stub.cc -+--- xla-76da730179313b3bebad6dea6861768421b7358c/xla/tsl/cuda/cupti_stub.cc 2024-10-21 20:29:31.000000000 +0200 -++++ xla-76da730179313b3bebad6dea6861768421b7358c_cupti_include/xla/tsl/cuda/cupti_stub.cc 2025-01-17 10:22:48.948856765 +0100 -+@@ -13,7 +13,7 @@ -+ limitations under the License. -+ ==============================================================================*/ -+ -+-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" -++#include -+ #include "third_party/gpus/cuda/include/cuda.h" -+ #include "tsl/platform/dso_loader.h" -+ #include "tsl/platform/load_library.h" From a2ec79c2027bcc2f24c6ca04217563634df0b0b7 Mon Sep 17 00:00:00 2001 From: thoffman Date: Tue, 20 May 2025 14:14:50 +0200 Subject: [PATCH 31/32] whitespaces --- .../j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb index 4b60a90867a..0a2d35da664 100644 --- a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb +++ b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb @@ -60,7 +60,7 @@ _jaxlib_buildopts += '--bazel_options="--copt=-Wno-maybe-uninitialized" ' # TOD _jaxlib_buildopts += '--cuda_version=%(cudaver)s ' _jaxlib_buildopts += '--python_bin_path=$EBROOTPYTHON/bin/python3 ' # Do not use hermetic CUDA/cuDNN/NCCL: (requires action_env=CPATH=$EBROOTCUDA/extras/CUPTI/include"; -# requires patch of external/xla/xla/tsl/cuda/cupti_stub.cc and jaxlib/gpu/vendor.h (#include ): +# requires patch of external/xla/xla/tsl/cuda/cupti_stub.cc and jaxlib/gpu/vendor.h (#include ): _jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDNN_PATH="$EBROOTCUDNN" """ _jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_NCCL_PATH="$EBROOTNCCL" """ _jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDA_PATH="$EBROOTCUDA" """ @@ -160,13 +160,13 @@ components = [ # tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex64 FAILED [ 98%] # tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex128 FAILED [ 99%] # tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex64 FAILED [ 99%] -# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex128 - AssertionError: -# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex64 - AssertionError: -# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex128 - AssertionError: -# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex64 - AssertionError: +# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex128 - AssertionError: +# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_expm1_complex64 - AssertionError: +# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex128 - AssertionError: +# FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex64 - AssertionError: # tests/nn_test.py::NNFunctionsTest::testDotProductAttentionMask7 FAILED [ 10%] # FAILED tests/nn_test.py::NNFunctionsTest::testDotProductAttentionMask7 - AssertionError: -# +# # Some tests require an isolated run: TODO: still required? @@ -191,7 +191,7 @@ local_test_exports = [ local_test = ''.join(['export %s;' % x for x in local_test_exports]) # run all tests at once except for local_isolated_tests: local_test += "pytest -vv tests %s && " % ' '.join(['--deselect %s' % x for x in local_isolated_tests]) -# run remaining local_isolated_tests separately: +# run remaining local_isolated_tests separately: local_test += ' && '.join(['pytest -vv %s' % x for x in local_isolated_tests]) From 2172e30bf387e9a82b4a6e1e21fb951f77046b1f Mon Sep 17 00:00:00 2001 From: thoffman Date: Tue, 20 May 2025 14:38:57 +0200 Subject: [PATCH 32/32] whitepace --- .../easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb index 0a2d35da664..5a431a68079 100644 --- a/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb +++ b/easybuild/easyconfigs/j/jax/jax-0.4.35-gfbf-2024a-CUDA-12.6.0.eb @@ -165,7 +165,7 @@ components = [ # FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex128 - AssertionError: # FAILED tests/lax_test.py::FunctionAccuracyTest::testSuccessOnComplexPlane_tan_complex64 - AssertionError: # tests/nn_test.py::NNFunctionsTest::testDotProductAttentionMask7 FAILED [ 10%] -# FAILED tests/nn_test.py::NNFunctionsTest::testDotProductAttentionMask7 - AssertionError: +# FAILED tests/nn_test.py::NNFunctionsTest::testDotProductAttentionMask7 - AssertionError: #