diff --git a/easybuild/easyconfigs/c/CuPy/CuPy-13.0.0-foss-2023a-CUDA-12.1.1.eb b/easybuild/easyconfigs/c/CuPy/CuPy-13.0.0-foss-2023a-CUDA-12.1.1.eb new file mode 100644 index 000000000000..4d1e02157e5b --- /dev/null +++ b/easybuild/easyconfigs/c/CuPy/CuPy-13.0.0-foss-2023a-CUDA-12.1.1.eb @@ -0,0 +1,85 @@ +easyblock = 'PythonBundle' + +name = 'CuPy' +version = '13.0.0' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://cupy.dev' +description = "CuPy is an open-source array library accelerated with NVIDIA CUDA." + +toolchain = {'name': 'foss', 'version': '2023a'} + +builddependencies = [ + ('hypothesis', '6.82.0'), +] + +dependencies = [ + ('Python', '3.11.3'), + ('SciPy-bundle', '2023.07'), + ('CUDA', '12.1.1', '', SYSTEM), + ('cuDNN', '8.9.2.26', versionsuffix, SYSTEM), + ('NCCL', '2.18.3', versionsuffix), + ('cuTENSOR', '2.0.1.2', versionsuffix, SYSTEM), + ('cuSPARSELt', '0.6.0.6', versionsuffix, SYSTEM), +] + +use_pip = True + +exts_default_options = {'source_urls': [PYPI_LOWER_SOURCE]} + +_skip_tests = [ + '--ignore tests/example_tests', # examples are not included + '--ignore tests/cupy_tests/fft_tests/test_fft.py', # CUFFT_INTERNAL_ERROR + # Sorting broken on at least T4 (this is troubling): + '--deselect tests/cupy_tests/' + 'sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_one_dim', + '--deselect tests/cupy_tests/' + 'sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_one_dim', + # https://github.com/cupy/cupy/issues/8255: + '--deselect tests/cupyx_tests/' + 'scipy_tests/signal_tests/test_filter_design.py::TestSOSFreqz::test_sos_freqz_against_mp', + # Floating point precision issues: + '--deselect tests/cupy_tests/core_tests/fusion_tests/test_reduction.py::TestFusion', + '--deselect tests/cupyx_tests/' + 'scipy_tests/signal_tests/test_filter_design.py::TestSOSFreqz::test_sosfrez_design_cheb2_2', + '--deselect tests/cupyx_tests/scipy_tests/signal_tests/test_fir_filter_design.py::TestFirls::test_firls', + '--deselect tests/cupyx_tests/scipy_tests/signal_tests/test_iir_filter_design.py::TestButtord::test_ellip_butter', + '--deselect tests/cupyx_tests/scipy_tests/signal_tests/test_iir_filter_design.py::TestEllipord::test_bandstop', + '--deselect tests/cupyx_tests/scipy_tests/signal_tests/test_iir_filter_design.py::TestEllipord::test_fs_param', + '--deselect tests/cupyx_tests/scipy_tests/signal_tests/test_iir_filter_design.py::TestEllipord::test_ellip_butter', + '--deselect tests/cupyx_tests/scipy_tests/signal_tests/test_ltisys.py::Test_bode::test_from_state_space', + '--deselect tests/cupyx_tests/scipy_tests/signal_tests/test_ltisys.py::TestPlacePoles::test_real_2', + '--deselect tests/cupyx_tests/' + 'scipy_tests/signal_tests/test_polyutils.py::TestPartialFractionExpansion::test_residuez_general', +] + +_parallel_tests = 4 # tests can require a lot of VRAM + +exts_list = [ + ('fastrlock', '0.8.2', { + 'checksums': ['644ec9215cf9c4df8028d8511379a15d9c1af3e16d80e47f1b6fdc6ba118356a'], + }), + ('cupy', version, { + 'preinstallopts': 'CUPY_NUM_BUILD_JOBS=%(parallel)s EB_CCC="%(cuda_cc_cmake)s" ', + 'patches': [ + 'cupy-%(version)s_cusparselt_0.6.0.patch', + 'cupy-%(version)s_eb_ccc.patch', + ], + 'runtest': 'export CUPY_TEST_GPU_LIMIT=1 CUPY_CACHE_DIR="%%(builddir)s" && ' + 'pytest -n %s tests -k "not slow" ' % _parallel_tests + ' '.join(_skip_tests), + 'testinstall': True, + 'checksums': [ + {'cupy-13.0.0.tar.gz': '2f04e7857f692a713360dc9c3b06709806ab8404fca39b5af9721c04a2979aae'}, + {'cupy-13.0.0_cusparselt_0.6.0.patch': '09cb12d26e78079c50b06f17002bf54c66e5e4743b917c5a218d3fe90124d499'}, + {'cupy-13.0.0_eb_ccc.patch': 'bfe8b46344759f58491f55418bd9c856d6f72d681ee5fef12820009f808d2db1'}, + ], + }), +] + +sanity_check_commands = [ + "python -c 'import cupy'", +] + +sanity_pip_check = True + +moduleclass = 'lib' diff --git a/easybuild/easyconfigs/c/CuPy/cupy-13.0.0_cusparselt_0.6.0.patch b/easybuild/easyconfigs/c/CuPy/cupy-13.0.0_cusparselt_0.6.0.patch new file mode 100644 index 000000000000..c202cd34a6c3 --- /dev/null +++ b/easybuild/easyconfigs/c/CuPy/cupy-13.0.0_cusparselt_0.6.0.patch @@ -0,0 +1,334 @@ +Support for cusparselt 0.6.0 adapted from https://github.com/cupy/cupy/pull/8074 +but with bug fixed for compressed size functions that were wrong. +author: micketeer@gmail.com +diff -ru cupy_backends.orig/cuda/libs/cusparselt.pxd cupy_backends/cuda/libs/cusparselt.pxd +--- cupy_backends.orig/cuda/libs/cusparselt.pxd 2024-03-23 17:08:35.676189389 +0100 ++++ cupy_backends/cuda/libs/cusparselt.pxd 2024-03-23 17:30:34.931591841 +0100 +@@ -13,18 +13,20 @@ + CUSPARSELT_MAT_BATCH_STRIDE = 1 # READ/WRITE + + # cusparseComputeType +- CUSPARSE_COMPUTE_16F = 0 +- CUSPARSE_COMPUTE_32I = 1 +- CUSPARSE_COMPUTE_TF32 = 2 +- CUSPARSE_COMPUTE_TF32_FAST = 3 ++ CUSPARSE_COMPUTE_32I = 0 ++ CUSPARSE_COMPUTE_16F = 1 ++ CUSPARSE_COMPUTE_32F = 2 + + # cusparseLtMatmulDescAttribute_t + CUSPARSELT_MATMUL_ACTIVATION_RELU = 0 # READ/WRITE + CUSPARSELT_MATMUL_ACTIVATION_RELU_UPPERBOUND = 1 # READ/WRITE + CUSPARSELT_MATMUL_ACTIVATION_RELU_THRESHOLD = 2 # READ/WRITE + CUSPARSELT_MATMUL_ACTIVATION_GELU = 3 # READ/WRITE +- CUSPARSELT_MATMUL_BIAS_STRIDE = 4 # READ/WRITE +- CUSPARSELT_MATMUL_BIAS_POINTER = 5 # READ/WRITE ++ CUSPARSELT_MATMUL_ACTIVATION_GELU_SCALING = 4 # READ/WRITE ++ CUSPARSELT_MATMUL_ALPHA_VECTOR_SCALING = 5 # READ/WRITE ++ CUSPARSELT_MATMUL_BETA_VECTOR_SCALING = 6 # READ/WRITE ++ CUSPARSELT_MATMUL_BIAS_POINTER = 7 # READ/WRITE ++ CUSPARSELT_MATMUL_BIAS_STRIDE = 8 # READ/WRITE + + # cusparseLtMatmulAlg_t + CUSPARSELT_MATMUL_ALG_DEFAULT = 0 +@@ -33,6 +35,14 @@ + CUSPARSELT_MATMUL_ALG_CONFIG_ID = 0 # NOQA, READ/WRITE + CUSPARSELT_MATMUL_ALG_CONFIG_MAX_ID = 1 # NOQA, READ-ONLY + CUSPARSELT_MATMUL_SEARCH_ITERATIONS = 2 # NOQA, READ/WRITE ++ CUSPARSELT_MATMUL_SPLIT_K = 3 # NOQA, READ/WRITE ++ CUSPARSELT_MATMUL_SPLIT_K_MODE =4 # NOQA, READ/WRITE ++ CUSPARSELT_MATMUL_SPLIT_K_BUFFERS=5 # NOQA, READ/WRITE ++ ++ # cusparseLtSplitKMode_t ++ CUSPARSELT_INVALID_MODE = 0 ++ CUSPARSELT_SPLIT_K_MODE_ONE_KERNEL = 1 ++ CUSPARSELT_SPLIT_K_MODE_TWO_KERNELS = 2 + + # cusparseLtPruneAlg_t + CUSPARSELT_PRUNE_SPMMA_TILE = 0 +diff -ru cupy_backends.orig/cuda/libs/cusparselt.pyx cupy_backends/cuda/libs/cusparselt.pyx +--- cupy_backends.orig/cuda/libs/cusparselt.pyx 2024-03-23 17:08:35.676189389 +0100 ++++ cupy_backends/cuda/libs/cusparselt.pyx 2024-03-23 18:03:43.706215397 +0100 +@@ -11,6 +11,13 @@ + from cupy_backends.cuda.libs import cusparse as _cusparse + + ++############################################################################### ++# Types ++############################################################################### ++cdef extern from *: ++ ctypedef void* LibraryPropertyType 'libraryPropertyType_t' ++ ++ + cdef extern from '../../cupy_cusparselt.h' nogil: + ctypedef int cusparseStatus_t 'cusparseStatus_t' + ctypedef int cusparseOrder_t 'cusparseOrder_t' +@@ -36,11 +43,14 @@ + ctypedef int cusparseOperation_t 'cusparseOperation_t' + ctypedef int cusparseLtMatmulAlg_t 'cusparseLtMatmulAlg_t' + ctypedef int cusparseLtMatmulAlgAttribute_t 'cusparseLtMatmulAlgAttribute_t' # NOQA ++ ctypedef int cusparseLtSplitKMode_t 'cusparseLtSplitKMode_t' + ctypedef int cusparseLtPruneAlg_t 'cusparseLtPruneAlg_t' + + # Management Functions + cusparseStatus_t cusparseLtInit(cusparseLtHandle_t* handle) + cusparseStatus_t cusparseLtDestroy(const cusparseLtHandle_t* handle) ++ cusparseStatus_t cusparseLtGetVersion(const cusparseLtHandle_t* handle, int* version) ++ cusparseStatus_t cusparseLtGetProperty(LibraryPropertyType propertyType, int* value) + + # Matmul Functions + cusparseStatus_t cusparseLtDenseDescriptorInit( +@@ -66,6 +76,11 @@ + const cusparseLtMatDescriptor_t* matDescr, + cusparseLtMatDescAttribute_t matAttribute, + void* data, size_t dataSize) ++ cusparseStatus_t cusparseLtMatDescSetAttribute( ++ const cusparseLtHandle_t* handle, ++ const cusparseLtMatDescriptor_t* matDescr, ++ cusparseLtMatDescAttribute_t matAttribute, ++ void* data, size_t dataSize) + cusparseStatus_t cusparseLtMatmulDescriptorInit( + const cusparseLtHandle_t* handle, + cusparseLtMatmulDescriptor_t* matMulDescr, +@@ -95,17 +110,21 @@ + const cusparseLtHandle_t* handle, + cusparseLtMatmulAlgSelection_t* algSelection, + cusparseLtMatmulAlgAttribute_t attribute, +- const void* data, size_t ataSize) +- cusparseStatus_t cusparseLtMatmulGetWorkspace( ++ const void* data, size_t dataSize) ++ cusparseStatus_t cusparseLtMatmulAlgGetAttribute( + const cusparseLtHandle_t* handle, + const cusparseLtMatmulAlgSelection_t* algSelection, ++ cusparseLtMatmulAlgAttribute_t attribute, ++ void* data, size_t dataSize) ++ cusparseStatus_t cusparseLtMatmulGetWorkspace( ++ const cusparseLtHandle_t* handle, ++ const cusparseLtMatmulPlan_t* plan, + size_t* workspaceSize) + cusparseStatus_t cusparseLtMatmulPlanInit( + const cusparseLtHandle_t* handle, + cusparseLtMatmulPlan_t* plan, + const cusparseLtMatmulDescriptor_t* matmulDescr, +- const cusparseLtMatmulAlgSelection_t* algSelection, +- size_t workspaceSize) ++ const cusparseLtMatmulAlgSelection_t* algSelection) + cusparseStatus_t cusparseLtMatmulPlanDestroy( + const cusparseLtMatmulPlan_t* plan) + cusparseStatus_t cusparseLtMatmul( +@@ -113,6 +132,11 @@ + const void* alpha, const void* d_A, const void* d_B, + const void* beta, const void* d_C, void* d_D, + void* workspace, runtime.Stream* streams, int32_t numStreams) ++ cusparseStatus_t cusparseLtMatmulSearch( ++ const cusparseLtHandle_t* handle, cusparseLtMatmulPlan_t* plan, ++ const void* alpha, const void* d_A, const void* d_B, ++ const void* beta, const void* d_C, void* d_D, ++ void* workspace, runtime.Stream* streams, int32_t numStreams) + + # Helper Functions + cusparseStatus_t cusparseLtSpMMAPrune( +@@ -123,7 +147,7 @@ + cusparseStatus_t cusparseLtSpMMAPruneCheck( + const cusparseLtHandle_t* handle, + const cusparseLtMatmulDescriptor_t* matmulDescr, +- const void* d_in, int* valid, runtime.Stream stream) ++ const void* d_in, int* d_valid, runtime.Stream stream) + cusparseStatus_t cusparseLtSpMMAPrune2( + const cusparseLtHandle_t* handle, + const cusparseLtMatDescriptor_t* sparseMatDescr, +@@ -136,19 +160,22 @@ + runtime.Stream stream) + cusparseStatus_t cusparseLtSpMMACompressedSize( + const cusparseLtHandle_t* handle, const cusparseLtMatmulPlan_t* plan, +- size_t* compressedSize) ++ size_t* compressedSize, ++ size_t* compressedBufferSize) + cusparseStatus_t cusparseLtSpMMACompress( + const cusparseLtHandle_t* handle, const cusparseLtMatmulPlan_t* plan, +- const void* d_dense, void* d_compressed, runtime.Stream stream) ++ const void* d_dense, void* d_compressed, void* d_compressed_buffer, ++ runtime.Stream stream) + cusparseStatus_t cusparseLtSpMMACompressedSize2( + const cusparseLtHandle_t* handle, + const cusparseLtMatDescriptor_t* sparseMatDescr, +- size_t* compressedSize) ++ size_t* compressedSize, ++ size_t* compressedBufferSize) + cusparseStatus_t cusparseLtSpMMACompress2( + const cusparseLtHandle_t* handle, + const cusparseLtMatDescriptor_t* sparseMatDescr, + int isSparseA, cusparseOperation_t op, const void* d_dense, +- void* d_compressed, runtime.Stream stream) ++ void* d_compressed, void* d_compressed_buffer, runtime.Stream stream) + + # Build-time version + int CUSPARSELT_VERSION +@@ -370,28 +397,36 @@ + data, dataSize) + check_status(status) + ++cpdef matmulAlgGetAttribute(Handle handle, MatmulAlgSelection algSelection, ++ attribute, size_t data, size_t dataSize): ++ """Gets the attribute related to algorithm selection descriptor.""" ++ status = cusparseLtMatmulAlgGetAttribute( ++ handle._ptr, ++ algSelection._ptr, ++ attribute, ++ data, dataSize) ++ check_status(status) ++ + cpdef size_t matmulGetWorkspace(Handle handle, +- MatmulAlgSelection algSelection): ++ MatmulPlan plan): + """Determines the required workspace size""" + cdef size_t workspaceSize + status = cusparseLtMatmulGetWorkspace( + handle._ptr, +- algSelection._ptr, ++ plan._ptr, + &workspaceSize) + check_status(status) + return workspaceSize + + cpdef matmulPlanInit(Handle handle, MatmulPlan plan, + MatmulDescriptor matmulDescr, +- MatmulAlgSelection algSelection, +- size_t workspaceSize): ++ MatmulAlgSelection algSelection): + """Initializes the plan.""" + status = cusparseLtMatmulPlanInit( + handle._ptr, + plan._ptr, + matmulDescr._ptr, +- algSelection._ptr, +- workspaceSize) ++ algSelection._ptr) + check_status(status) + + cpdef matmulPlanDestroy(MatmulPlan plan): +@@ -412,6 +447,18 @@ + workspace, NULL, 0) + check_status(status) + ++cpdef matmulSearch(Handle handle, MatmulPlan plan, ++ size_t alpha, size_t d_A, size_t d_B, ++ size_t beta, size_t d_C, size_t d_D, size_t workspace): ++ """Evaluates all available algorithms for the matrix multiplication""" ++ status = cusparseLtMatmulSearch( ++ handle._ptr, ++ plan._ptr, ++ alpha, d_A, d_B, ++ beta, d_C, d_D, ++ workspace, NULL, 0) ++ check_status(status) ++ + ############################################################################### + # cuSPARSELt: Helper Functions + ############################################################################### +@@ -428,13 +475,13 @@ + check_status(status) + + cpdef spMMAPruneCheck(Handle handle, MatmulDescriptor matmulDescr, +- size_t d_in, size_t valid): ++ size_t d_in, size_t d_valid): + """Checks the correctness of the pruning structure""" + cdef intptr_t stream = stream_module.get_current_stream_ptr() + status = cusparseLtSpMMAPruneCheck( + handle._ptr, + matmulDescr._ptr, +- d_in, valid, stream) ++ d_in, d_valid, stream) + check_status(status) + + cpdef spMMAPrune2(Handle handle, MatDescriptor sparseMatDescr, isSparseA, +@@ -460,47 +507,52 @@ + d_valid, stream) + check_status(status) + +-cpdef size_t spMMACompressedSize(Handle handle, MatmulPlan plan): ++cpdef spMMACompressedSize(Handle handle, MatmulPlan plan): + """Provides the size of the compressed matrix""" + cdef size_t compressedSize ++ cdef size_t compressedBufferSize + status = cusparseLtSpMMACompressedSize( + handle._ptr, + plan._ptr, +- &compressedSize) ++ &compressedSize, &compressedBufferSize) + check_status(status) +- return compressedSize ++ return compressedSize, compressedBufferSize + + cpdef spMMACompress(Handle handle, MatmulPlan plan, +- size_t d_dense, size_t d_compressed): ++ size_t d_dense, size_t d_compressed, ++ size_t d_compressed_buffer): + """Compresses a dense matrix d_dense.""" + cdef intptr_t stream = stream_module.get_current_stream_ptr() + status = cusparseLtSpMMACompress( + handle._ptr, + plan._ptr, +- d_dense, d_compressed, stream) ++ d_dense, d_compressed, ++ d_compressed_buffer, stream) + check_status(status) + +-cpdef size_t spMMACompressedSize2(Handle handle, MatDescriptor sparseMatDescr): ++cpdef spMMACompressedSize2(Handle handle, MatDescriptor sparseMatDescr): + """Provides the size of the compressed matrix""" + cdef size_t compressedSize ++ cdef size_t compressedBufferSize + status = cusparseLtSpMMACompressedSize2( + handle._ptr, + sparseMatDescr._ptr, +- &compressedSize) ++ &compressedSize, &compressedBufferSize) + check_status(status) +- return compressedSize ++ return compressedSize, compressedBufferSize + + cpdef spMMACompress2(Handle handle, MatDescriptor sparseMatDescr, +- isSparseA, op, size_t d_dense, size_t d_compressed): ++ isSparseA, op, size_t d_dense, ++ size_t d_compressed, size_t d_compressed_buffer): + """Compresses a dense matrix d_dense.""" + cdef intptr_t stream = stream_module.get_current_stream_ptr() + status = cusparseLtSpMMACompress2( + handle._ptr, + sparseMatDescr._ptr, + isSparseA, op, d_dense, +- d_compressed, stream) ++ d_compressed, d_compressed_buffer, ++ stream) + check_status(status) + +- + def get_build_version(): + return CUSPARSELT_VERSION +diff -ru cupy_backends.orig/stub/cupy_cusparselt.h cupy_backends/stub/cupy_cusparselt.h +--- cupy_backends.orig/stub/cupy_cusparselt.h 2024-03-23 17:08:35.677189397 +0100 ++++ cupy_backends/stub/cupy_cusparselt.h 2024-03-23 18:15:04.734544642 +0100 +@@ -19,6 +19,7 @@ + typedef enum {} cusparseLtMatmulDescAttribute_t; + typedef enum {} cusparseLtMatmulAlg_t; + typedef enum {} cusparseLtMatmulAlgAttribute_t; ++typedef enum {} cusparseLtSplitKMode_t; + typedef enum {} cusparseLtPruneAlg_t; + + typedef void* cudaStream_t; +@@ -36,6 +37,14 @@ + return CUSPARSE_STATUS_SUCCESS; + } + ++cusparseStatus_t cusparseLtGetVersion(...) { ++ return CUSPARSE_STATUS_SUCCESS; ++} ++ ++cusparseStatus_t cusparseLtGetProperty(...) { ++ return CUSPARSE_STATUS_SUCCESS; ++} ++ + cusparseStatus_t cusparseLtDenseDescriptorInit(...) { + return CUSPARSE_STATUS_SUCCESS; + } diff --git a/easybuild/easyconfigs/c/CuPy/cupy-13.0.0_eb_ccc.patch b/easybuild/easyconfigs/c/CuPy/cupy-13.0.0_eb_ccc.patch new file mode 100644 index 000000000000..3a2be87de98f --- /dev/null +++ b/easybuild/easyconfigs/c/CuPy/cupy-13.0.0_eb_ccc.patch @@ -0,0 +1,16 @@ +Pick up which cuda compute capabilities to use from the environment variable +EB_CCC in the standard format "70,75,80" +author: micketeer@gmail.com +--- install/cupy_builder/_compiler.py.orig 2024-03-24 01:09:26.501631534 +0000 ++++ install/cupy_builder/_compiler.py 2024-03-24 01:10:28.550644001 +0000 +@@ -21,6 +21,10 @@ + if sys.argv == ['setup.py', 'develop']: + return [] + ++ envcfg = os.getenv('EB_CCC', None) ++ if envcfg is not None: ++ return [f'--generate-code=arch=compute_{cc},code=sm_{cc}' for cc in envcfg.split(';')] ++ + envcfg = os.getenv('CUPY_NVCC_GENERATE_CODE', None) + if envcfg is not None and envcfg != 'current': + return ['--generate-code={}'.format(arch)