Skip to content
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
easyblock = 'PythonBundle'

name = 'Horovod'
version = '0.26.1'
local_pt_version = '1.11.0'
local_cuda_suffix = '-CUDA-%(cudaver)s'
versionsuffix = local_cuda_suffix + '-PyTorch-%s' % local_pt_version

homepage = 'https://github.com/uber/horovod'
description = "Horovod is a distributed training framework for TensorFlow."

toolchain = {'name': 'foss', 'version': '2021b'}

builddependencies = [
('CMake', '3.22.1'),
('flatbuffers', '2.0.0'),
]
dependencies = [
('Python', '3.9.6'),
('PyYAML', '5.4.1'),
('CUDA', '11.4.1', '', SYSTEM),
('NCCL', '2.10.3', local_cuda_suffix),
('PyTorch', local_pt_version, local_cuda_suffix),
]

use_pip = True
sanity_pip_check = True

preinstallopts = 'HOROVOD_WITH_MPI=1 HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_GPU_BROADCAST=NCCL '
preinstallopts += 'HOROVOD_WITHOUT_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 HOROVOD_WITHOUT_MXNET=1 '

parallel = 1 # Bug in CMake causes a race condition on horovod_cuda_kernels_generated_cuda_kernels.cu.o.NVCC-depend

exts_list = [
('cloudpickle', '2.2.0', {
'checksums': ['3f4219469c55453cfe4737e564b67c2a149109dabf7f242478948b895f61106f'],
}),
('horovod', version, {
'checksums': ['0079e4efb4ffe3f042e9e00470fbb457973903e748f75909de71e492d2474e94'],
}),
]

sanity_check_paths = {
'files': ['bin/horovodrun'],
'dirs': ['lib/python%(pyshortver)s/site-packages'],
}

moduleclass = 'tools'
36 changes: 36 additions & 0 deletions easybuild/easyconfigs/p/Pillow/Pillow-8.2.0-GCCcore-11.2.0.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
easyblock = 'PythonPackage'

name = 'Pillow'
version = '8.2.0'

homepage = 'https://pillow.readthedocs.org/'
description = """Pillow is the 'friendly PIL fork' by Alex Clark and Contributors.
PIL is the Python Imaging Library by Fredrik Lundh and Contributors."""

toolchain = {'name': 'GCCcore', 'version': '11.2.0'}

sources = [SOURCE_TAR_GZ]
patches = ['Pillow-%(version_major)s_CVE-2021-23437.patch']
checksums = [
'a787ab10d7bb5494e5f76536ac460741788f1fbce851068d73a87ca7c35fc3e1', # Pillow-8.2.0.tar.gz
'12bac0ff4f815d1e60ed129106c2990c8c6157891fd90ec8aaec88a5a642d521', # Pillow-8_CVE-2021-23437.patch
]

builddependencies = [('binutils', '2.37')]

dependencies = [
('Python', '3.9.6'),
('libjpeg-turbo', '2.0.6'),
('libpng', '1.6.37'),
('zlib', '1.2.11'),
('LibTIFF', '4.3.0'),
('freetype', '2.11.0')
]

download_dep_fail = True
use_pip = True
sanity_pip_check = True

options = {'modulename': 'PIL'}

moduleclass = 'vis'
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
name = 'PyTorch'
version = '1.11.0'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://pytorch.org/'
description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration.
PyTorch is a deep learning framework that puts Python first."""

toolchain = {'name': 'foss', 'version': '2021b'}

sources = [{
'filename': '%(name)s-%(version)s.tar.gz',
'git_config': {
'url': 'https://github.com/pytorch',
'repo_name': 'pytorch',
'tag': 'v%(version)s',
'recursive': True,
},
}]
patches = [
'PyTorch-1.7.0_avoid-nan-in-test-torch.patch',
'PyTorch-1.7.0_disable-dev-shm-test.patch',
'PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch',
'PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch',
'PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch',
'PyTorch-1.10.0_skip_cmake_rpath.patch',
'PyTorch-1.11.0_increase-distributed-test-timeout.patch',
'PyTorch-1.11.0_skip_failing_ops_tests.patch',
'PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch',
'PyTorch-1.11.0_fix_sharded_imports.patch',
'PyTorch-1.11.0_increase_test_tolerances_TF32.patch',
'PyTorch-1.11.0_increase_c10d_gloo_timeout.patch',
'PyTorch-1.11.0_disable_failing_jit_cuda_fuser_tests.patch',
]
checksums = [
None, # can't add proper SHA256 checksum, because source tarball is created locally after recursive 'git clone'
'b899aa94d9e60f11ee75a706563312ccefa9cf432756c470caa8e623991c8f18', # PyTorch-1.7.0_avoid-nan-in-test-torch.patch
'622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a', # PyTorch-1.7.0_disable-dev-shm-test.patch
'89ac7a8e9e7df2e64cf8404fe3a279f5e9b759fee41c9de3aaff9c22f385c2c6', # PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch
# PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch
'ff573660913ce055e24cfd194ce747ba5685091c631cfd443eae2a99d56b57ea',
# PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch
'313dca681f45ce3bc7c4557fdcdcbe0b77216d2c708fa30a2ec0e22c44876707',
'ac05943bb205623f91ef140aa00869efc5fe844184bd666bebf5405808610448', # PyTorch-1.10.0_skip_cmake_rpath.patch
# PyTorch-1.11.0_increase-distributed-test-timeout.patch
'087ad20163a1291773ae3457569b80523080eb3731e210946459b2333a919f3f',
'8eaca92d64fcadb0552d28e9c7ea5c4bc669d2fe33004e45a3519ce8d0d136a2', # PyTorch-1.11.0_skip_failing_ops_tests.patch
'21fc678febcdfbb9dabd72235be23cd392044e9a954f6580d15b530e1f69dcc1', # PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch
'9a04f4285b800dad8a00c3014af0a9713d40d5dd35d10931c7c0da4e89c558e9', # PyTorch-1.11.0_fix_sharded_imports.patch
# PyTorch-1.11.0_increase_test_tolerances_TF32.patch
'26e179a4f6f57e49209092612ae5f5cd8c03fd2ca84566ba0244eabefc3736ba',
# PyTorch-1.11.0_increase_c10d_gloo_timeout.patch
'20cd4a8663f74ab326fdb032b926bf5c7e94d9750c515ab9050927ba00cf1953',
# PyTorch-1.11.0_disable_failing_jit_cuda_fuser_tests.patch
'e7bfe120a8b3fe2b40dac6839852a5fbab3cb3429fbe44a0fc3a1800adaaee51',
]

osdependencies = [OS_PKG_IBVERBS_DEV]

builddependencies = [
('CMake', '3.22.1'),
('hypothesis', '6.14.6'),
]

dependencies = [
('CUDA', '11.4.1', '', SYSTEM),
('Ninja', '1.10.2'), # Required for JIT compilation of C++ extensions
('Python', '3.9.6'),
('protobuf', '3.17.3'),
('protobuf-python', '3.17.3'),
('pybind11', '2.7.1'),
('SciPy-bundle', '2021.10'),
('typing-extensions', '4.3.0'),
('PyYAML', '5.4.1'),
('MPFR', '4.1.0'),
('GMP', '6.2.1'),
('numactl', '2.0.14'),
('FFmpeg', '4.3.2'),
('Pillow', '8.2.0'),
('cuDNN', '8.2.2.26', '-CUDA-%(cudaver)s', SYSTEM),
('magma', '2.6.2', '-CUDA-%(cudaver)s'),
('NCCL', '2.10.3', '-CUDA-%(cudaver)s'),
('expecttest', '0.1.3'),
]

# default CUDA compute capabilities to use (override via --cuda-compute-capabilities)
cuda_compute_capabilities = ['3.5', '3.7', '5.2', '6.0', '6.1', '7.0', '7.2', '7.5', '8.0', '8.6']

custom_opts = ["USE_CUPTI_SO=1"]

excluded_tests = {
'': [
# Bad tests: https://github.com/pytorch/pytorch/issues/60260
'distributed/elastic/utils/distributed_test',
'distributed/elastic/multiprocessing/api_test',
# These tests fail on A10s at the very least, they time out forever no matter how long the timeout is.
# Possibly related to NCCL 2.8.3: https://docs.nvidia.com/deeplearning/nccl/release-notes/rel_2-8-3.html
# 'distributed/test_distributed_fork',
'distributed/test_distributed_spawn',
# Fails on A10s: https://github.com/pytorch/pytorch/issues/63079
'test_optim',
# Test from this suite timeout often. The process group backend is deprecated anyway
# 'distributed/rpc/test_process_group_agent',
# This test fails constently when run as part of the test suite, but succeeds when run interactively
'test_model_dump',
# These tests appear flaky, possibly related to number of GPUs that are used
'distributed/fsdp/test_fsdp_memory',
'distributed/fsdp/test_fsdp_overlap',
]
}

runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error --verbose %(excluded_tests)s'

# several tests are known to be flaky, and fail in some contexts (like having multiple GPUs available),
# so we allow up to 10 (out of ~90k) tests to fail before treating the installation to be faulty
max_failed_tests = 10

# The readelf sanity check command can be taken out once the TestRPATH test from
# https://github.com/pytorch/pytorch/pull/68912 is accepted, since it is then checked as part of the PyTorch test suite
local_libcaffe2 = "$EBROOTPYTORCH/lib/python%%(pyshortver)s/site-packages/torch/lib/libcaffe2_nvrtc.%s" % SHLIB_EXT
sanity_check_commands = [
"readelf -d %s | egrep 'RPATH|RUNPATH' | grep -v stubs" % local_libcaffe2,
]
tests = ['PyTorch-check-cpp-extension.py']

moduleclass = 'devel'
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
easyblock = 'PythonPackage'

name = 'typing-extensions'
version = '4.3.0'

homepage = 'https://github.com/python/typing/blob/master/typing_extensions/README.rst'
description = 'Typing Extensions – Backported and Experimental Type Hints for Python'

toolchain = {'name': 'GCCcore', 'version': '11.2.0'}

sources = ['typing_extensions-%(version)s.tar.gz']
checksums = ['e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6']

builddependencies = [('binutils', '2.37')]

dependencies = [('Python', '3.9.6')]

use_pip = True
download_dep_fail = True
sanity_pip_check = True

moduleclass = 'devel'