diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 7bc5e5f..7081eb7 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -34,7 +34,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -45,7 +45,7 @@ jobs: if: ${{ !startsWith(github.ref, 'refs/tags/') }} needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -55,7 +55,7 @@ jobs: if: ${{ startsWith(github.ref, 'refs/tags/') }} needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index a34fa55..d700f58 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -14,7 +14,7 @@ jobs: - devcontainer - telemetry-setup secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.10 telemetry-setup: runs-on: ubuntu-latest continue-on-error: true @@ -29,20 +29,20 @@ jobs: checks: secrets: inherit needs: telemetry-setup - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.10 with: ignored_pr_jobs: telemetry-summarize conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10 with: build_type: pull-request script: ci/build_cpp.sh devcontainer: secrets: inherit needs: telemetry-setup - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.10 with: arch: '["amd64", "arm64"]' cuda: '["13.0"]' diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml index 72751d0..48bf37a 100644 --- a/.github/workflows/trigger-breaking-change-alert.yaml +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -12,7 +12,7 @@ jobs: trigger-notifier: if: contains(github.event.pull_request.labels.*.name, 'breaking') secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.10 with: sender_login: ${{ github.event.sender.login }} sender_avatar: ${{ github.event.sender.avatar_url }} diff --git a/README.md b/README.md index 603117e..4ebcd1d 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # cuMLPrims -This repository contains C++ and CUDA code of muti-node multi-GPU (MNMG) ML mathematical primitives and some algorithms, that are used by [the main cuML project](https://github.com/rapidsai/cuml). The build system uses CMake for build configuration, and an out-of-source build is recommended. +This repository contains C++ and CUDA code of muti-node multi-GPU (MNMG) ML mathematical primitives and some algorithms, that are used by [the main cuML project](https://github.com/rapidsai/cuml). The build system uses CMake for build configuration, and an out-of-source build is recommended. -As of version 0.13, the MNMG code included in cuMLPrims follows the model one-process-per-GPU (OPG), where the code uses a communication library (based on cuML's comms) and each process has one GPU assigned to it. This is in contrast to single-process-multi-GPU (SPMG) approaches, which are no longer part of the code base. +The MNMG code included in cuMLPrims follows the model one-process-per-GPU (OPG), where the code uses a communication library (based on cuML's comms) and each process has one GPU assigned to it. This is in contrast to single-process-multi-GPU (SPMG) approaches, which are no longer part of the code base. ## Folder Structure @@ -14,7 +14,7 @@ The folder structure mirrors closely the structure of github cuML. The folders a - `cpp/cmake`: CMake related scripts. - `cpp/include`: The include folder for headers that are necessary to be installed/distributed to use the libcumlprims.so artifact by users of the library. - `cpp/src_prims_opg`: Contains source code for MNMG ML primitives. It also contains source code for algorithms that use the primitives that are still included in cuMLPrims as opposed to cuML. - - `cpp/test`: Googletest based unit tests (work in progress). + - `cpp/test`: Googletest based unit tests. ## Building cuMLPrims: @@ -22,22 +22,20 @@ The folder structure mirrors closely the structure of github cuML. The folders a The main artifact produced by the build system is the shared library libcumlprims. Ensure the following dependencies are satisfied: -1. CMake (>= 3.14.5) -2. CUDA (>= 10.0) -3. GCC (>= 5.4.0) -4. NCCL (>= 2.4.6.1) -5. UCX with CUDA support (optional)(>=1.7) - enables point-to-point messaging in the cuML communicator. -6. zlib +1. CMake (>= 3.30.4) +2. CUDA (>= 12.0) +3. GCC (>= 14) +4. NCCL (>= 2.5) -It is recommended to use conda for environment/package management. If doing so, it is recommended to use the convenience environment .yml file located in [**the cuML repository**, in `conda/environments/cuml_dev_cudax.y.yml` (replace x.y for your CUDA version)](https://github.com/rapidsai/cuml/tree/branch-0.14/conda/environments). This file contains most of the dependencies ment1ioned above (notable exceptions are gcc and zlib). To use it, for example to create an environment named cuml_dev for CUDA 10.0 and Python 3.7, you can use the follow command: +It is recommended to use conda for environment/package management. See `conda/environments/` for available environment files. ```bash -conda env create -n cuml_dev python=3.7 --file=conda/environments/cuml_dev_cuda10.0.yml +conda env create --name cumlprims_dev --file conda/environments/all_cuda-130_arch-$(arch).yaml ``` ### Using build.sh script -As a convenience, a `build.sh` script is provided which can be used to execute the build commands in an automated manner. Note that the libraries will be installed to the location set in `$INSTALL_PREFIX` if set (i.e. `export INSTALL_PREFIX=/install/path`), otherwise to `$CONDA_PREFIX`. +As a convenience, a `build.sh` script is provided which can be used to execute the build commands in an automated manner. Note that the libraries will be installed to the location set in `$INSTALL_PREFIX` if set (i.e. `export INSTALL_PREFIX=/install/path`), otherwise to `$CONDA_PREFIX`. ```bash $ ./build.sh # build the libcuml library and tests # install them to $INSTALL_PREFIX if set, otherwise $CONDA_PREFIX @@ -46,12 +44,12 @@ $ ./build.sh # build the libcuml library and tests Other `build.sh` options: ```bash -$ ./build.sh clean # remove any prior build artifacts and configuration (start over) +$ ./build.sh clean # remove any prior build artifacts and configuration (start over) $ ./build.sh libcumlprims -v # build and install libcumlprims with verbose output $ ./build.sh libcumlprims -g # build and install libcumlprims for debug $ PARALLEL_LEVEL=4 ./build.sh libcumlprims # build and install libcumlprims limiting parallel build jobs to 4 (make -j4) -$ ./build.sh libcuml -n # build libcuml but do not install -$ ./build.sh libcumlprims --allgpuarch # build the tests for all supported GPU architectures +$ ./build.sh libcuml -n # build libcuml but do not install +$ ./build.sh libcumlprims --allgpuarch # build the tests for all supported GPU architectures ``` @@ -107,11 +105,7 @@ cuMLPrims CMake has the following configurable flags available: | Flag | Possible Values | Default Value | Behavior | | --- | --- | --- | --- | -| BUILD_OPG_TESTS | [ON, OFF] | OFF | Build MPI cumlcomms based C++ unit tests (in progress, refer to step 4. of the build steps). | -| BUILD_CUMLPRIMS_LIBRARY | [ON, OFF] | ON | Enable/disable building libcumprims shared library. | -| CMAKE_CXX11_ABI | [ON, OFF] | ON | Enable/disable the GLIBCXX11 ABI | -| DISABLE_OPENMP | [ON, OFF] | OFF | Set to `ON` to disable OpenMP | -| KERNEL_INFO | [ON, OFF] | OFF | Enable/disable kernel resource usage info in nvcc. | -| LINE_INFO | [ON, OFF] | OFF | Enable/disable lineinfo in nvcc. | -| NVTX | [ON, OFF] | OFF | Enable/disable nvtx markers in libcumlprims. | -| GPU_ARCHS | List of GPU architectures, semicolon-separated | 60;70;75 | List of GPU architectures that all artifacts are compiled for. | +| BUILD_OPG_TESTS | [ON, OFF] | OFF | Build MPI cumlcomms based C++ unit tests (in progress, refer to step 4. of the build steps). | +| BUILD_CUMLPRIMS_LIBRARY | [ON, OFF] | ON | Enable/disable building libcumlprims shared library. | +| DISABLE_OPENMP | [ON, OFF] | OFF | Set to `ON` to disable OpenMP | +| NVTX | [ON, OFF] | OFF | Enable/disable nvtx markers in libcumlprims.|