From a61f0d22be001f71af048bc8b05d0e495b7a4ec9 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 20 Jan 2025 14:20:34 -0600 Subject: [PATCH 1/2] Add configuration for whitespace normalization. --- .pre-commit-config.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8447183ca1..8ce60fe6f4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,12 @@ --- -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2025, NVIDIA CORPORATION. repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer - repo: https://github.com/psf/black rev: 22.10.0 hooks: From 7e789bbc44753b32e1a56fb69cddf6a2ad978124 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 20 Jan 2025 14:22:24 -0600 Subject: [PATCH 2/2] Apply pre-commit. --- .github/labeler.yml | 3 +-- LICENSE | 2 +- ci/checks/black_lists.sh | 4 ++-- codecov.yml | 2 +- cpp/.clang-tidy | 3 +-- cpp/CMakeLists.txt | 2 +- cpp/cmake/modules/ConfigureAlgorithms.cmake | 3 +-- cpp/examples/symreg/README.md | 12 +++++----- .../cuml/common/pinned_host_vector.hpp | 4 ++-- cpp/include/cuml/experimental/fil/README.md | 4 ++-- cpp/include/cuml/solvers/params.hpp | 4 ++-- cpp/include/cuml/tsa/holtwinters_params.h | 4 ++-- cpp/src/dbscan/vertexdeg/pack.h | 4 ++-- .../decisiontree/batched-levelalgo/bins.cuh | 4 ++-- cpp/src/genetic/constants.h | 4 ++-- cpp/src/glm/qn/mg/glm_base_mg.cuh | 4 ++-- cpp/src/glm/qn/mg/standardization.cuh | 4 ++-- cpp/src/hdbscan/detail/kernels/membership.cuh | 4 ++-- .../hdbscan/detail/kernels/stabilities.cuh | 4 ++-- cpp/src/hdbscan/detail/predict.cuh | 4 ++-- cpp/src/tsne/kluger_lab_license.txt | 2 +- cpp/src_prims/datasets/boston.h | 4 ++-- cpp/src_prims/datasets/breast_cancer.h | 4 ++-- cpp/src_prims/datasets/diabetes.h | 4 ++-- cpp/test/c_api/README.md | 2 +- cpp/test/mg/kmeans_test.cu | 4 ++-- cpp/test/sg/handle_test.cu | 4 ++-- cpp/test/sg/hdbscan_inputs.hpp | 4 ++-- docs/source/_static/references.css | 2 +- docs/source/api.rst | 2 +- docs/source/cuml_blogs.rst | 1 - docs/source/cuml_intro.rst | 2 +- docs/source/user_guide.rst | 1 - notebooks/README.md | 2 +- .../data/time_series/population_estimate.csv | 2 +- notebooks/random_forest_demo.ipynb | 2 +- print_env.sh | 15 +++++++------ python/cuml/.coveragerc | 2 +- python/cuml/README.md | 2 -- .../cuml/cuml/_thirdparty/sklearn/README.md | 2 +- .../tests/ts_datasets/population_estimate.csv | 2 +- thirdparty/LICENSES/LICENSE.H2O4GPU | 2 +- thirdparty/LICENSES/LICENSE.faiss | 2 +- wiki/DEFINITION_OF_DONE_CRITERIA.md | 14 ++++++------ wiki/README.md | 4 ++-- wiki/mnmg/Using_Infiniband_for_MNMG.md | 22 +++++++++---------- 46 files changed, 90 insertions(+), 98 deletions(-) diff --git a/.github/labeler.yml b/.github/labeler.yml index bc1c15661f..b7146a1bdc 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -5,7 +5,7 @@ Cython / Python: - 'python/**' - 'notebooks/**' - + CUDA/C++: - 'cpp/**' @@ -18,4 +18,3 @@ ci: conda: - 'conda/**' - diff --git a/LICENSE b/LICENSE index 4b54edd235..3ba63d53f4 100644 --- a/LICENSE +++ b/LICENSE @@ -187,7 +187,7 @@ identification within third-party archives. Copyright 2018 NVIDIA CORPORATION - + Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at diff --git a/ci/checks/black_lists.sh b/ci/checks/black_lists.sh index 85435cf856..6300ab359e 100755 --- a/ci/checks/black_lists.sh +++ b/ci/checks/black_lists.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2025, NVIDIA CORPORATION. ########################################## # cuML black listed function call Tester # ########################################## @@ -40,7 +40,7 @@ done for cond_black_listed in cudaMemcpy cudaMemset; do TMP=`git --no-pager diff --ignore-submodules -w --minimal -U0 -S"$cond_black_listed" $PR_TARGET_BRANCH | grep '^+' | grep -v '^+++' | grep -P "$cond_black_listed(?!Async)"` - + if [ "$TMP" != "" ]; then for filename in `git --no-pager diff --ignore-submodules -w --minimal --name-only -S"$cond_black_listed" $PR_TARGET_BRANCH`; do basefilename=$(basename -- "$filename") diff --git a/codecov.yml b/codecov.yml index 5e6b1fbbb3..038c75e2a0 100644 --- a/codecov.yml +++ b/codecov.yml @@ -11,4 +11,4 @@ comment: # undocumented option: # https://community.codecov.io/t/unable-to-determine-a-parent-commit-to-compare-against-in-base-branch-after-squash-and-merge/2480/15 codecov: - allow_coverage_offsets: true \ No newline at end of file + allow_coverage_offsets: true diff --git a/cpp/.clang-tidy b/cpp/.clang-tidy index fea01ccea0..37651f0c26 100644 --- a/cpp/.clang-tidy +++ b/cpp/.clang-tidy @@ -4,7 +4,7 @@ WarningsAsErrors: '*' HeaderFilterRegex: '' AnalyzeTemporaryDtors: false FormatStyle: none -CheckOptions: +CheckOptions: - key: cert-dcl16-c.NewSuffixes value: 'L;LL;LU;LLU' - key: cppcoreguidelines-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic @@ -142,4 +142,3 @@ CheckOptions: - key: readability-identifier-naming.TypedefSuffix value: '' ... - diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 118f3f0e28..b7ef6e2293 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -115,7 +115,7 @@ message(VERBOSE "CUML_CPP: RMM_LOGGING_LEVEL = '${RMM_LOGGING_LEVEL}'.") set(LIBCUML_LOGGING_LEVEL "DEBUG" CACHE STRING "Choose the logging level." -) +) set_property( CACHE LIBCUML_LOGGING_LEVEL PROPERTY STRINGS "TRACE" "DEBUG" "INFO" "WARN" "ERROR" "CRITICAL" "OFF" diff --git a/cpp/cmake/modules/ConfigureAlgorithms.cmake b/cpp/cmake/modules/ConfigureAlgorithms.cmake index f93425405e..261c0a1ac2 100644 --- a/cpp/cmake/modules/ConfigureAlgorithms.cmake +++ b/cpp/cmake/modules/ConfigureAlgorithms.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# Copyright (c) 2022-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -129,4 +129,3 @@ else() set(LINK_CUVS ON) endif() endif() - diff --git a/cpp/examples/symreg/README.md b/cpp/examples/symreg/README.md index 52581eb627..444e8e7c68 100644 --- a/cpp/examples/symreg/README.md +++ b/cpp/examples/symreg/README.md @@ -1,5 +1,5 @@ # symbolic regression -This subfolder contains an example on how perform symbolic regression in cuML (from C++) +This subfolder contains an example on how perform symbolic regression in cuML (from C++) There are two `CMakeLists.txt` in this folder: 1. `CMakeLists.txt` (default) which is included when building cuML 2. `CMakeLists_standalone.txt` as an example for a stand alone project linking to `libcuml.so` @@ -11,7 +11,7 @@ $ cmake .. -DCUML_LIBRARY_DIR=/path/to/directory/with/libcuml.so -DCUML_INCLUDE_ ``` Then build with `make` or `ninja` ``` -$ make +$ make Scanning dependencies of target raft [ 10%] Creating directories for 'raft' [ 20%] Performing download step (git clone) for 'raft' @@ -28,7 +28,7 @@ Scanning dependencies of target symreg_example [100%] Linking CUDA executable symreg_example [100%] Built target symreg_example ``` -`CMakeLists_standalone.txt` also loads a minimal set of header dependencies(namely [raft](https://github.com/rapidsai/raft) and [cub](https://github.com/NVIDIA/cub)) if they are not detected in the system. +`CMakeLists_standalone.txt` also loads a minimal set of header dependencies(namely [raft](https://github.com/rapidsai/raft) and [cub](https://github.com/NVIDIA/cub)) if they are not detected in the system. ## Run 1. Generate a toy training and test dataset @@ -53,7 +53,7 @@ $ ./symreg_example -n_cols 2 \ -stopping_criteria 0.01 \ -p_crossover 0.7 \ -p_subtree 0.1 \ - -p_hoist 0.05 \ + -p_hoist 0.05 \ -p_point 0.1 \ -parsimony_coefficient 0.01 ``` @@ -77,11 +77,11 @@ Finished training for 4 generations. Best AST equation :( add( sub( mult( X0, X0) , div( X1, X1) ) , sub( X1, mult( X1, X1) ) ) ) Training time = 626.658ms *************************************** -Beginning Inference on Test dataset... +Beginning Inference on Test dataset... Inference score on test set = 5.29271e-08 Inference time = 0.35248ms Some Predicted test values: -1.65061;-1.64081;-0.91711;-2.28976;-0.280688; Corresponding Actual test values: -1.65061;-1.64081;-0.91711;-2.28976;-0.280688; -``` \ No newline at end of file +``` diff --git a/cpp/include/cuml/common/pinned_host_vector.hpp b/cpp/include/cuml/common/pinned_host_vector.hpp index 768bcb3b4e..db49c9d635 100644 --- a/cpp/include/cuml/common/pinned_host_vector.hpp +++ b/cpp/include/cuml/common/pinned_host_vector.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,4 +61,4 @@ class pinned_host_vector { std::size_t size_; }; -} // namespace ML \ No newline at end of file +} // namespace ML diff --git a/cpp/include/cuml/experimental/fil/README.md b/cpp/include/cuml/experimental/fil/README.md index 48d4a4ab16..70ece00cb7 100644 --- a/cpp/include/cuml/experimental/fil/README.md +++ b/cpp/include/cuml/experimental/fil/README.md @@ -3,11 +3,11 @@ RAPIDS Forest Inference Library (FIL) provides accelerated inference for tree-based machine learning models. Unlike packages like XGBoost, LightGBM, or even Scikit-Learn/cuML's random forest implementations, FIL cannot be used to _train_ forest models. Instead, its goal is to speed up -inference using forest models trained by all of those packages. +inference using forest models trained by all of those packages. This directory contains an experimental new implementation of FIL which provides both CPU and GPU execution. Its GPU implementation also offers -improved performance relative to the existing implementation in many but not all cases. +improved performance relative to the existing implementation in many but not all cases. For Python usage information and more extensive information on parameter-tuning and other end-user functionality, check out diff --git a/cpp/include/cuml/solvers/params.hpp b/cpp/include/cuml/solvers/params.hpp index d32b4bc7ba..3be8603bbe 100644 --- a/cpp/include/cuml/solvers/params.hpp +++ b/cpp/include/cuml/solvers/params.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. + * Copyright (c) 2018-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,4 +33,4 @@ enum loss_funct { enum penalty { NONE, L1, L2, ELASTICNET }; -}; // namespace ML \ No newline at end of file +}; // namespace ML diff --git a/cpp/include/cuml/tsa/holtwinters_params.h b/cpp/include/cuml/tsa/holtwinters_params.h index e896816164..c16fa74400 100644 --- a/cpp/include/cuml/tsa/holtwinters_params.h +++ b/cpp/include/cuml/tsa/holtwinters_params.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,4 +46,4 @@ struct OptimParams { enum Norm { L0, L1, L2, LINF }; -} // namespace ML \ No newline at end of file +} // namespace ML diff --git a/cpp/src/dbscan/vertexdeg/pack.h b/cpp/src/dbscan/vertexdeg/pack.h index e876050e0f..c6700a34ad 100644 --- a/cpp/src/dbscan/vertexdeg/pack.h +++ b/cpp/src/dbscan/vertexdeg/pack.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * Copyright (c) 2018-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -65,4 +65,4 @@ struct Pack { } // namespace VertexDeg } // namespace Dbscan -} // namespace ML \ No newline at end of file +} // namespace ML diff --git a/cpp/src/decisiontree/batched-levelalgo/bins.cuh b/cpp/src/decisiontree/batched-levelalgo/bins.cuh index 312c4f2b51..9e89bada62 100644 --- a/cpp/src/decisiontree/batched-levelalgo/bins.cuh +++ b/cpp/src/decisiontree/batched-levelalgo/bins.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -73,4 +73,4 @@ struct AggregateBin { } }; } // namespace DT -} // namespace ML \ No newline at end of file +} // namespace ML diff --git a/cpp/src/genetic/constants.h b/cpp/src/genetic/constants.h index 5e793a6604..1934e022a7 100644 --- a/cpp/src/genetic/constants.h +++ b/cpp/src/genetic/constants.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,4 +28,4 @@ const int GENE_TPB = 256; const int MAX_STACK_SIZE = 20; } // namespace genetic -} // namespace cuml \ No newline at end of file +} // namespace cuml diff --git a/cpp/src/glm/qn/mg/glm_base_mg.cuh b/cpp/src/glm/qn/mg/glm_base_mg.cuh index 49c309c5a4..2884f75b15 100644 --- a/cpp/src/glm/qn/mg/glm_base_mg.cuh +++ b/cpp/src/glm/qn/mg/glm_base_mg.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -204,4 +204,4 @@ struct GLMWithDataMG : ML::GLM::detail::GLMWithData { }; }; // namespace opg }; // namespace GLM -}; // namespace ML \ No newline at end of file +}; // namespace ML diff --git a/cpp/src/glm/qn/mg/standardization.cuh b/cpp/src/glm/qn/mg/standardization.cuh index 4cd169f909..f0cc15cdba 100644 --- a/cpp/src/glm/qn/mg/standardization.cuh +++ b/cpp/src/glm/qn/mg/standardization.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -377,4 +377,4 @@ struct Standardizer { }; // namespace opg }; // namespace GLM -}; // namespace ML \ No newline at end of file +}; // namespace ML diff --git a/cpp/src/hdbscan/detail/kernels/membership.cuh b/cpp/src/hdbscan/detail/kernels/membership.cuh index b5bbf2a34e..8db7c15d64 100644 --- a/cpp/src/hdbscan/detail/kernels/membership.cuh +++ b/cpp/src/hdbscan/detail/kernels/membership.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -70,4 +70,4 @@ struct probabilities_functor { }; // namespace Membership }; // namespace detail }; // namespace HDBSCAN -}; // namespace ML \ No newline at end of file +}; // namespace ML diff --git a/cpp/src/hdbscan/detail/kernels/stabilities.cuh b/cpp/src/hdbscan/detail/kernels/stabilities.cuh index 7248457d1d..3e324f4289 100644 --- a/cpp/src/hdbscan/detail/kernels/stabilities.cuh +++ b/cpp/src/hdbscan/detail/kernels/stabilities.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,4 +61,4 @@ struct stabilities_functor { }; // namespace Stability }; // namespace detail }; // namespace HDBSCAN -}; // namespace ML \ No newline at end of file +}; // namespace ML diff --git a/cpp/src/hdbscan/detail/predict.cuh b/cpp/src/hdbscan/detail/predict.cuh index 9cbe5fea19..217afed3aa 100644 --- a/cpp/src/hdbscan/detail/predict.cuh +++ b/cpp/src/hdbscan/detail/predict.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -272,4 +272,4 @@ void approximate_predict(const raft::handle_t& handle, }; // end namespace Predict }; // end namespace detail }; // end namespace HDBSCAN -}; // end namespace ML \ No newline at end of file +}; // end namespace ML diff --git a/cpp/src/tsne/kluger_lab_license.txt b/cpp/src/tsne/kluger_lab_license.txt index 4a8d5f8a8e..90a858ccfd 100644 --- a/cpp/src/tsne/kluger_lab_license.txt +++ b/cpp/src/tsne/kluger_lab_license.txt @@ -132,4 +132,4 @@ General Public License. (e.g. they do not require you to accompany any object code using FFTW with the corresponding source code.) For these alternative terms you must purchase a license from MIT’s Technology Licensing Office. Users interested in such a license should contact us (fftw@fftw.org) for more -information. \ No newline at end of file +information. diff --git a/cpp/src_prims/datasets/boston.h b/cpp/src_prims/datasets/boston.h index acbd4db928..984bae0362 100644 --- a/cpp/src_prims/datasets/boston.h +++ b/cpp/src_prims/datasets/boston.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -535,4 +535,4 @@ static const int n_features = 13; } // namespace Boston } // namespace Datasets -} // namespace MLCommon \ No newline at end of file +} // namespace MLCommon diff --git a/cpp/src_prims/datasets/breast_cancer.h b/cpp/src_prims/datasets/breast_cancer.h index 9432a20701..4293e03d70 100644 --- a/cpp/src_prims/datasets/breast_cancer.h +++ b/cpp/src_prims/datasets/breast_cancer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1989,4 +1989,4 @@ static const int n_features = 30; } // namespace BreastCancer } // namespace Datasets -} // namespace MLCommon \ No newline at end of file +} // namespace MLCommon diff --git a/cpp/src_prims/datasets/diabetes.h b/cpp/src_prims/datasets/diabetes.h index a4983e1ff2..d13359e274 100644 --- a/cpp/src_prims/datasets/diabetes.h +++ b/cpp/src_prims/datasets/diabetes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1504,4 +1504,4 @@ static const int n_features = 10; } // namespace Diabetes } // namespace Datasets -} // namespace MLCommon \ No newline at end of file +} // namespace MLCommon diff --git a/cpp/test/c_api/README.md b/cpp/test/c_api/README.md index d0632dfa64..e23b509360 100644 --- a/cpp/test/c_api/README.md +++ b/cpp/test/c_api/README.md @@ -17,4 +17,4 @@ To help prevent accidentally including the C-API files when compiling `libcuml++ #error \ "This header is only for the C-API and should not be included from the C++ API." #endif -``` \ No newline at end of file +``` diff --git a/cpp/test/mg/kmeans_test.cu b/cpp/test/mg/kmeans_test.cu index 4ee5836f18..0d3732202e 100644 --- a/cpp/test/mg/kmeans_test.cu +++ b/cpp/test/mg/kmeans_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -203,4 +203,4 @@ INSTANTIATE_TEST_CASE_P(KmeansTests, KmeansTestF, ::testing::ValuesIn(inputsf2)) INSTANTIATE_TEST_CASE_P(KmeansTests, KmeansTestD, ::testing::ValuesIn(inputsd2)); -} // end namespace ML \ No newline at end of file +} // end namespace ML diff --git a/cpp/test/sg/handle_test.cu b/cpp/test/sg/handle_test.cu index 8bf87fa71e..3764c2fd02 100644 --- a/cpp/test/sg/handle_test.cu +++ b/cpp/test/sg/handle_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,4 +45,4 @@ TEST(HandleTest, DoubleDestoryFails) // handle is destroyed status = cumlDestroy(handle); EXPECT_EQ(CUML_INVALID_HANDLE, status); -} \ No newline at end of file +} diff --git a/cpp/test/sg/hdbscan_inputs.hpp b/cpp/test/sg/hdbscan_inputs.hpp index eaf37543ac..9933c6e8a9 100644 --- a/cpp/test/sg/hdbscan_inputs.hpp +++ b/cpp/test/sg/hdbscan_inputs.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -11560,4 +11560,4 @@ const std::vector> membership_vector_inputs = 0.003645882708951831, 0.0031043440103530884}}}; }; // namespace HDBSCAN -}; // namespace ML \ No newline at end of file +}; // namespace ML diff --git a/docs/source/_static/references.css b/docs/source/_static/references.css index 225cf13ba9..d1f647233a 100644 --- a/docs/source/_static/references.css +++ b/docs/source/_static/references.css @@ -20,4 +20,4 @@ dl.citation > dt.label > span::before { /* Add closing bracket */ dl.citation > dt.label > span::after { content: "]"; -} \ No newline at end of file +} diff --git a/docs/source/api.rst b/docs/source/api.rst index 44a29563f3..2def05ac72 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -52,7 +52,7 @@ cuML provides experimental support for running selected estimators and operators * - Regression and Classification - Ridge -If a CUDA-enabled GPU is available on the system, cuML will default to using it. Users can configure CPU or GPU execution for supported operators via context managers or global configuration. +If a CUDA-enabled GPU is available on the system, cuML will default to using it. Users can configure CPU or GPU execution for supported operators via context managers or global configuration. .. code-block:: python diff --git a/docs/source/cuml_blogs.rst b/docs/source/cuml_blogs.rst index 0df70746c1..c56334671f 100644 --- a/docs/source/cuml_blogs.rst +++ b/docs/source/cuml_blogs.rst @@ -28,4 +28,3 @@ Academic Papers --------------- * `Machine Learning in Python: Main developments and technology trends in data science, machine learning, and artificial intelligence (Sebastian Raschka, Joshua Patterson, Corey Nolet) `_ - diff --git a/docs/source/cuml_intro.rst b/docs/source/cuml_intro.rst index f78a472718..54b193b888 100644 --- a/docs/source/cuml_intro.rst +++ b/docs/source/cuml_intro.rst @@ -18,7 +18,7 @@ then call ``predict`` or ``transform`` for inference. .. code-block:: python import cuml.LinearRegression - + model = cuml.LinearRegression() model.fit(X_train, y) y_prediction = model.predict(X_test) diff --git a/docs/source/user_guide.rst b/docs/source/user_guide.rst index 73f0f1d057..64c7705eb3 100644 --- a/docs/source/user_guide.rst +++ b/docs/source/user_guide.rst @@ -7,4 +7,3 @@ User Guide estimator_intro.ipynb pickling_cuml_models.ipynb execution_device_interoperability.ipynb - diff --git a/notebooks/README.md b/notebooks/README.md index a6b7e28461..39d40e4fd8 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -9,7 +9,7 @@ documentation tree. ## Additional notebooks Notebook Title | Status | Description ---- | --- | --- +--- | --- | --- [ARIMA Demo](arima_demo.ipynb) | Working | Forecast using ARIMA on time-series data. [Forest Inference Demo](forest_inference_demo.ipynb) | Working | Save and load an XGBoost model into FIL and infer on new data. [KMeans Demo](kmeans_demo.ipynb) | Working | Predict using k-means, visualize and compare the results with Scikit-learn's k-means. diff --git a/notebooks/data/time_series/population_estimate.csv b/notebooks/data/time_series/population_estimate.csv index 02e1be61ed..2c6afa6fac 100644 --- a/notebooks/data/time_series/population_estimate.csv +++ b/notebooks/data/time_series/population_estimate.csv @@ -135,4 +135,4 @@ 2008,2104.1,2187.4 2009,2134.0,2213.2 2010,2158.2,2234.9 -2011,2174.3,2248.4 \ No newline at end of file +2011,2174.3,2248.4 diff --git a/notebooks/random_forest_demo.ipynb b/notebooks/random_forest_demo.ipynb index a94b41cfe3..eb4e6e7c2b 100755 --- a/notebooks/random_forest_demo.ipynb +++ b/notebooks/random_forest_demo.ipynb @@ -291,4 +291,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/print_env.sh b/print_env.sh index db24245c73..0129213d6e 100755 --- a/print_env.sh +++ b/print_env.sh @@ -1,7 +1,8 @@ #!/usr/bin/env bash +# Copyright (c) 2020-2025, NVIDIA CORPORATION. # Reports relevant environment information useful for diagnosing and # debugging cuML issues. -# Usage: +# Usage: # "./print_env.sh" - prints to stdout # "./print_env.sh > env.txt" - prints to file "env.txt" @@ -14,16 +15,16 @@ git submodule status --recursive else echo "Not inside a git repository" fi -echo +echo echo "***OS Information***" cat /etc/*-release uname -a -echo +echo echo "***GPU Information***" nvidia-smi -echo +echo echo "***CPU***" lscpu @@ -31,15 +32,15 @@ echo echo "***CMake***" which cmake && cmake --version -echo +echo echo "***g++***" which g++ && g++ --version -echo +echo echo "***nvcc***" which nvcc && nvcc --version -echo +echo echo "***Python***" which python && python -c "import sys; print('Python {0}.{1}.{2}'.format(sys.version_info[0], sys.version_info[1], sys.version_info[2]))" diff --git a/python/cuml/.coveragerc b/python/cuml/.coveragerc index c1a8b220e2..de753b8687 100644 --- a/python/cuml/.coveragerc +++ b/python/cuml/.coveragerc @@ -23,4 +23,4 @@ exclude_lines = # Don't complain if non-runnable code isn't run: if 0: - if False: \ No newline at end of file + if False: diff --git a/python/cuml/README.md b/python/cuml/README.md index 735e03ae2c..96729831d4 100644 --- a/python/cuml/README.md +++ b/python/cuml/README.md @@ -77,5 +77,3 @@ Packages required for multigpu algorithms*: ### Python Tests Python tests are based on the pytest library. To run them, from the `path_to_cuml/python/` folder, simply type `pytest`. - - diff --git a/python/cuml/cuml/_thirdparty/sklearn/README.md b/python/cuml/cuml/_thirdparty/sklearn/README.md index 38332cdcc1..cf5930e017 100644 --- a/python/cuml/cuml/_thirdparty/sklearn/README.md +++ b/python/cuml/cuml/_thirdparty/sklearn/README.md @@ -12,4 +12,4 @@ The code originates from the Scikit-Learn Github repository : https://github.com - Changes should be kept minimal, large portions of modified imported code should lie in the thirdparty_adapter directory - Only well-tested, reliable accelerated preprocessing functions should be exposed in cuml.preprocessing.__init__.py - Tests must be added for each exposed function - - Remember that a preprocessing model should always return the same datatype it received as input (NumPy, CuPy, Pandas, cuDF, Numba) \ No newline at end of file + - Remember that a preprocessing model should always return the same datatype it received as input (NumPy, CuPy, Pandas, cuDF, Numba) diff --git a/python/cuml/cuml/tests/ts_datasets/population_estimate.csv b/python/cuml/cuml/tests/ts_datasets/population_estimate.csv index 02e1be61ed..2c6afa6fac 100644 --- a/python/cuml/cuml/tests/ts_datasets/population_estimate.csv +++ b/python/cuml/cuml/tests/ts_datasets/population_estimate.csv @@ -135,4 +135,4 @@ 2008,2104.1,2187.4 2009,2134.0,2213.2 2010,2158.2,2234.9 -2011,2174.3,2248.4 \ No newline at end of file +2011,2174.3,2248.4 diff --git a/thirdparty/LICENSES/LICENSE.H2O4GPU b/thirdparty/LICENSES/LICENSE.H2O4GPU index c056706936..4935303b01 100644 --- a/thirdparty/LICENSES/LICENSE.H2O4GPU +++ b/thirdparty/LICENSES/LICENSE.H2O4GPU @@ -228,4 +228,4 @@ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -``` \ No newline at end of file +``` diff --git a/thirdparty/LICENSES/LICENSE.faiss b/thirdparty/LICENSES/LICENSE.faiss index 87cbf536c6..b96dcb0480 100644 --- a/thirdparty/LICENSES/LICENSE.faiss +++ b/thirdparty/LICENSES/LICENSE.faiss @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. diff --git a/wiki/DEFINITION_OF_DONE_CRITERIA.md b/wiki/DEFINITION_OF_DONE_CRITERIA.md index 0dace8ae36..2c2cd399b9 100644 --- a/wiki/DEFINITION_OF_DONE_CRITERIA.md +++ b/wiki/DEFINITION_OF_DONE_CRITERIA.md @@ -6,8 +6,8 @@ Below is a quick and simple checklist for developers to determine whether an algorithm is complete and ready for release. Most of these items contain more detailed descriptions in their corresponding developer guide. The checklist is broken down by layer (C++ or Python) and categorized further into - **Design:** All algorithms should be designed with an eye on maintainability, performance, readability, and robustness. -- **Testing:** The goal for automated testing is to increase both the spread and the depth of code coverage as much as possible in order to ease time spent fixing bugs and developing new features. Additionally, a very important factor for a tool like `cuml` is to provide testing with multiple datasets that really stress the mathematical behavior of the algorithms. A comprehensive set of tests lowers the possibility for regressions and the introduction of bugs as the code evolves between versions. This covers both correctness & performance. -- **Documentation:** User-facing documentation should be complete and descriptive. Developer-facing documentation should be used for constructs which are complex and/or not immediately obvious. +- **Testing:** The goal for automated testing is to increase both the spread and the depth of code coverage as much as possible in order to ease time spent fixing bugs and developing new features. Additionally, a very important factor for a tool like `cuml` is to provide testing with multiple datasets that really stress the mathematical behavior of the algorithms. A comprehensive set of tests lowers the possibility for regressions and the introduction of bugs as the code evolves between versions. This covers both correctness & performance. +- **Documentation:** User-facing documentation should be complete and descriptive. Developer-facing documentation should be used for constructs which are complex and/or not immediately obvious. - **Performance:** Algorithms should be [benchmarked] and profiled regularly to spot potential bottlenecks, performance regressions, and memory problems. ### C++ @@ -38,12 +38,12 @@ Below is a quick and simple checklist for developers to determine whether an alg #### Design - Python class is as "near drop-in replacement" for Scikit-learn (or relevant industry standard) API as possible. This means parameters have the same names as Scikit-learn, and where differences exist, they are clearly documented in docstrings. -- It is recommended to open an initial PR with the API design if there are going to be significant differences with reference APIs, or lack of a reference API, to have a discussion about it. +- It is recommended to open an initial PR with the API design if there are going to be significant differences with reference APIs, or lack of a reference API, to have a discussion about it. - Python class is pickleable and a test has been added to `cuml/tests/test_pickle.py` - APIs use `input_to_cuml_array` to accept flexible inputs and check their datatypes and use `cumlArray.to_output()` to return configurable outputs. - Any internal parameters or array-based instance variables use `CumlArray` -#### Testing +#### Testing - Pytests for wrapper functionality against Scikit-learn using relevant datasets - Stress tests against reasonable inputs (e.g short-wide, tall-narrow, different numerical precision) @@ -60,7 +60,7 @@ Below is a quick and simple checklist for developers to determine whether an alg ## Review Checklist -Aside from the general algorithm expectations outlined in the checklists above, code reviewers should use the following checklist to make sure the algorithm meets cuML standards. +Aside from the general algorithm expectations outlined in the checklists above, code reviewers should use the following checklist to make sure the algorithm meets cuML standards. ### All @@ -71,7 +71,7 @@ Aside from the general algorithm expectations outlined in the checklists above, - Changes to the public API will not have a negative impact to existing users between minor versions (eg. large changes to very popular public APIs go through a deprecation cycle to preserve backwards compatibility) - Where it is reasonable to do so, unexpected inputs fail gracefully and provide actionable feedback to the user - Automated tests properly exercise the changes in the PR -- New algorithms provide benchmarks (both C++ and Python) +- New algorithms provide benchmarks (both C++ and Python) ### C++ @@ -80,4 +80,4 @@ Aside from the general algorithm expectations outlined in the checklists above, ### Python -- Look at the list of slowest PyTests printed in the CI logs and check that any newly committed PyTests are not going to have a significant impact on the end-to-end execution. \ No newline at end of file +- Look at the list of slowest PyTests printed in the CI logs and check that any newly committed PyTests are not going to have a significant impact on the end-to-end execution. diff --git a/wiki/README.md b/wiki/README.md index c735da3552..a31ff0c17a 100644 --- a/wiki/README.md +++ b/wiki/README.md @@ -3,7 +3,7 @@ This wiki is provided as an extension to cuML's public documentation, geared toward developers on the project. If you are interested in contributing to cuML, read through our [contributing guide](../CONTRIBUTING.md). You are -also encouraged to read through our Python [developer guide](python/DEVELOPER_GUIDE.md) and C++ +also encouraged to read through our Python [developer guide](python/DEVELOPER_GUIDE.md) and C++ [developer guide](cpp/DEVELOPER_GUIDE.md) to gain an understanding for how we design our algorithms. -We have criteria for defining our [definition of done](DEFINITION_OF_DONE_CRITERIA.md) to allow us to provide high performance, maintainable and overall high quality implementations, while giving as much transparency as possible about the status of our algorithms with our users. \ No newline at end of file +We have criteria for defining our [definition of done](DEFINITION_OF_DONE_CRITERIA.md) to allow us to provide high performance, maintainable and overall high quality implementations, while giving as much transparency as possible about the status of our algorithms with our users. diff --git a/wiki/mnmg/Using_Infiniband_for_MNMG.md b/wiki/mnmg/Using_Infiniband_for_MNMG.md index 4dcad64005..0f9ad3613d 100644 --- a/wiki/mnmg/Using_Infiniband_for_MNMG.md +++ b/wiki/mnmg/Using_Infiniband_for_MNMG.md @@ -4,7 +4,7 @@ These instructions outline how to run multi-node multi-GPU cuML on devices with The steps in this wiki post have been largely adapted from the [Experiments in High Performance Networking with UCX and DGX](https://blog.dask.org/2019/06/09/ucx-dgx) blog by Matthew Rocklin and Rick Zamora. -## 1. Install UCX +## 1. Install UCX ### From Conda @@ -19,7 +19,7 @@ Install autogen if it's not already installed: sudo apt-get install autogen autoconf libtool ``` -Optionally install `gdrcopy` for faster GPU-Network card data transfer: +Optionally install `gdrcopy` for faster GPU-Network card data transfer: From the [ucx wiki](https://github.com/openucx/ucx/wiki/NVIDIA-GPU-Support), `gdrcopy` can be installed, and might be necessary, to enable faster GPU-Network card data transfer. @@ -160,7 +160,7 @@ If you configured UCX with the `gdrcopy` option, you should also expect to see t # error handling: none ``` -To better understand the CUDA-based transports in UCX, refer to [this wiki](https://github.com/openucx/ucx/wiki/NVIDIA-GPU-Support) for more details. +To better understand the CUDA-based transports in UCX, refer to [this wiki](https://github.com/openucx/ucx/wiki/NVIDIA-GPU-Support) for more details. ## 2. Install ucx-py @@ -184,11 +184,11 @@ make -j install ## 3. Install NCCL -It's important that NCCL 2.4+ be installed and no previous versions of NCCL are conflicting on your library path. This will cause compile errors during the build of cuML. +It's important that NCCL 2.4+ be installed and no previous versions of NCCL are conflicting on your library path. This will cause compile errors during the build of cuML. ```bash -conda install -c nvidia nccl +conda install -c nvidia nccl ``` Create the file `.nccl.conf` in your home dir with the following: @@ -196,7 +196,7 @@ Create the file `.nccl.conf` in your home dir with the following: NCCL_SOCKET_IFNAME=ib0 ``` -## 4. Enable IP over IB interface at ib0 +## 4. Enable IP over IB interface at ib0 Follow the instructions at [this link](https://docs.oracle.com/cd/E19436-01/820-3522-10/ch4-linux.html#50536461_82843) to create an IP interface for the IB devices. @@ -210,20 +210,20 @@ You can verify the interface was created properly with `ifconfig ib0` The output should look like this: ``` -ib0 Link encap:UNSPEC HWaddr 80-00-00-68-FE-80-00-00-00-00-00-00-00-00-00-00 +ib0 Link encap:UNSPEC HWaddr 80-00-00-68-FE-80-00-00-00-00-00-00-00-00-00-00 inet addr:10.0.0.50 Bcast:10.0.0.255 Mask:255.255.255.0 inet6 addr: fe80::526b:4b03:f5:ce9c/64 Scope:Link UP BROADCAST RUNNING MULTICAST MTU:65520 Metric:1 RX packets:2655 errors:0 dropped:0 overruns:0 frame:0 TX packets:2697 errors:0 dropped:10 overruns:0 carrier:0 - collisions:0 txqueuelen:256 + collisions:0 txqueuelen:256 RX bytes:183152 (183.1 KB) TX bytes:194696 (194.6 KB) ``` ## 5. Set UCX environment vars -Use `ibstatus` to see your open IB devices. Output will look like this: +Use `ibstatus` to see your open IB devices. Output will look like this: ``` Infiniband device 'mlx5_0' port 1 status: @@ -263,7 +263,7 @@ Infiniband device 'mlx5_3' port 1 status: link_layer: InfiniBand ``` - + Put the devices and ports in a `UCX_NET_DEVICES` environment variable: @@ -387,5 +387,3 @@ final_size: 16 final_size: 16 final_size: 16 ``` - -