Merge branch 'master' into macports

microsoft · Jul 12, 2024 · 6f9eae1 · 6f9eae1
2 parents c5b0476 + 2bc3ab8
commit 6f9eae1
Show file tree

Hide file tree

Showing 23 changed files with 269 additions and 405 deletions.
diff --git a/.ci/test.sh b/.ci/test.sh
@@ -191,6 +191,8 @@ elif [[ $TASK == "bdist" ]]; then
             PLATFORM="manylinux2014_$ARCH"
         fi
         sh ./build-python.sh bdist_wheel --integrated-opencl || exit 1
+        # rename wheel, to fix scikit-build-core choosing the platform 'linux_aarch64' instead of
+        # a manylinux tag
         mv \
             ./dist/*.whl \
             ./dist/tmp.whl || exit 1

diff --git a/.ci/test_r_package.sh b/.ci/test_r_package.sh
@@ -104,6 +104,18 @@ if [[ $OS_NAME == "macos" ]]; then
     sudo installer \
         -pkg $(pwd)/R.pkg \
         -target / || exit 1
+
+    # install tidy v5.8.0
+    # ref: https://groups.google.com/g/r-sig-mac/c/7u_ivEj4zhM
+    TIDY_URL=https://github.com/htacg/tidy-html5/releases/download/5.8.0/tidy-5.8.0-macos-x86_64+arm64.pkg
+    curl -sL ${TIDY_URL} -o tidy.pkg
+    sudo installer \
+        -pkg $(pwd)/tidy.pkg \
+        -target /
+
+    # ensure that this newer version of 'tidy' is used by 'R CMD check'
+    # ref: https://cran.r-project.org/doc/manuals/R-exts.html#Checking-packages
+    export R_TIDYCMD=/usr/local/bin/tidy
 fi
 
 # fix for issue where CRAN was not returning {lattice} and {evaluate} when using R 3.6
@@ -263,20 +275,25 @@ fi
 
 # this check makes sure that CI builds of the package
 # actually use MM_PREFETCH preprocessor definition
-if [[ $R_BUILD_TYPE == "cran" ]]; then
-    mm_prefetch_working=$(
-        cat $BUILD_LOG_FILE \
-        | grep --count -E "checking whether MM_PREFETCH work.*yes"
-    )
-else
-    mm_prefetch_working=$(
-        cat $BUILD_LOG_FILE \
-        | grep --count -E ".*Performing Test MM_PREFETCH - Success"
-    )
-fi
-if [[ $mm_prefetch_working -ne 1 ]]; then
-    echo "MM_PREFETCH test was not passed"
-    exit 1
+#
+# _mm_prefetch will not work on arm64 architecture
+# ref: https://github.com/microsoft/LightGBM/issues/4124
+if [[ $ARCH != "arm64" ]]; then
+    if [[ $R_BUILD_TYPE == "cran" ]]; then
+        mm_prefetch_working=$(
+            cat $BUILD_LOG_FILE \
+            | grep --count -E "checking whether MM_PREFETCH work.*yes"
+        )
+    else
+        mm_prefetch_working=$(
+            cat $BUILD_LOG_FILE \
+            | grep --count -E ".*Performing Test MM_PREFETCH - Success"
+        )
+    fi
+    if [[ $mm_prefetch_working -ne 1 ]]; then
+        echo "MM_PREFETCH test was not passed"
+        exit 1
+    fi
 fi
 
 # this check makes sure that CI builds of the package

diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml
@@ -55,12 +55,6 @@ jobs:
             r_version: 4.3
             build_type: cmake
             container: 'ubuntu:22.04'
-          - os: ubuntu-latest
-            task: r-package
-            compiler: clang
-            r_version: 3.6
-            build_type: cmake
-            container: 'ubuntu:18.04'
           - os: ubuntu-latest
             task: r-package
             compiler: clang
@@ -138,6 +132,13 @@ jobs:
             r_version: 4.3
             build_type: cran
             container: null
+          # macos-14 = arm64
+          - os: macos-14
+            task: r-package
+            compiler: clang
+            r_version: 4.3
+            build_type: cran
+            container: null
     steps:
       - name: Prevent conversion of line endings on Windows
         if: startsWith(matrix.os, 'windows')
@@ -188,12 +189,12 @@ jobs:
           CTAN_MIRROR: https://ctan.math.illinois.edu/systems/win32/miktex
           TINYTEX_INSTALLER: TinyTeX
       - name: Setup and run tests on Linux and macOS
-        if: matrix.os == 'macos-13' || matrix.os == 'ubuntu-latest'
+        if: startsWith(matrix.os, 'macos') || startsWith(matrix.os, 'ubuntu')
         shell: bash
         run: |
           export TASK="${{ matrix.task }}"
           export COMPILER="${{ matrix.compiler }}"
-          if [[ "${{ matrix.os }}" == "macos-13" ]]; then
+          if [[ "${{ matrix.os }}" =~ ^macos ]]; then
               export OS_NAME="macos"
           elif [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
               export OS_NAME="linux"

diff --git a/.vsts-ci.yml b/.vsts-ci.yml
@@ -182,71 +182,71 @@ jobs:
     inputs:
       filePath: $(Build.SourcesDirectory)/.ci/test.sh
       targetType: 'filePath'
-# ###########################################
-# - job: QEMU_multiarch
-# ###########################################
-#   variables:
-#     BUILD_DIRECTORY: /LightGBM
-#     COMPILER: gcc
-#     PRODUCES_ARTIFACTS: 'true'
-#   pool:
-#     vmImage: ubuntu-22.04
-#   timeoutInMinutes: 180
-#   strategy:
-#     matrix:
-#       bdist:
-#         TASK: bdist
-#         ARCH: aarch64
-#   steps:
-#   - script: |
-#       sudo apt-get update
-#       sudo apt-get install --no-install-recommends -y \
-#         binfmt-support \
-#         qemu \
-#         qemu-user \
-#         qemu-user-static
-#     displayName: 'Install QEMU'
-#   - script: |
-#       docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
-#     displayName: 'Enable Docker multi-architecture support'
-#   - script: |
-#       git clean -d -f -x
-#     displayName: 'Clean source directory'
-#   - script: |
-#       cat > docker-script.sh <<EOF
-#       export CONDA=\$HOME/miniforge
-#       export PATH=\$CONDA/bin:/opt/rh/llvm-toolset-7.0/root/usr/bin:\$PATH
-#       export LD_LIBRARY_PATH=/opt/rh/llvm-toolset-7.0/root/usr/lib64:\$LD_LIBRARY_PATH
-#       \$BUILD_DIRECTORY/.ci/setup.sh || exit 1
-#       \$BUILD_DIRECTORY/.ci/test.sh || exit 1
-#       EOF
-#       IMAGE_URI="lightgbm/vsts-agent:manylinux2014_aarch64"
-#       docker pull "${IMAGE_URI}" || exit 1
-#       PLATFORM=$(docker inspect --format='{{.Os}}/{{.Architecture}}' "${IMAGE_URI}") || exit 1
-#       echo "detected image platform: ${PLATFORM}"
-#       docker run \
-#         --platform "${PLATFORM}" \
-#         --rm \
-#         --env AZURE=true \
-#         --env BUILD_ARTIFACTSTAGINGDIRECTORY=$BUILD_ARTIFACTSTAGINGDIRECTORY \
-#         --env BUILD_DIRECTORY=$BUILD_DIRECTORY \
-#         --env COMPILER=$COMPILER \
-#         --env METHOD=$METHOD \
-#         --env OS_NAME=linux \
-#         --env PRODUCES_ARTIFACTS=$PRODUCES_ARTIFACTS \
-#         --env PYTHON_VERSION=$PYTHON_VERSION \
-#         --env TASK=$TASK \
-#         -v "$(Build.SourcesDirectory)":"$BUILD_DIRECTORY" \
-#         -v "$(Build.ArtifactStagingDirectory)":"$(Build.ArtifactStagingDirectory)" \
-#         "${IMAGE_URI}" \
-#         /bin/bash $BUILD_DIRECTORY/docker-script.sh
-#     displayName: 'Setup and run tests'
-#   - task: PublishBuildArtifacts@1
-#     condition: and(succeeded(), in(variables['TASK'], 'bdist'), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/')))
-#     inputs:
-#       pathtoPublish: '$(Build.ArtifactStagingDirectory)'
-#       artifactName: PackageAssets
-#       artifactType: container
+###########################################
+- job: QEMU_multiarch
+###########################################
+  variables:
+    BUILD_DIRECTORY: /LightGBM
+    COMPILER: gcc
+    PRODUCES_ARTIFACTS: 'true'
+  pool:
+    vmImage: ubuntu-22.04
+  timeoutInMinutes: 180
+  strategy:
+    matrix:
+      bdist:
+        TASK: bdist
+        ARCH: aarch64
+  steps:
+  - script: |
+      sudo apt-get update
+      sudo apt-get install --no-install-recommends -y \
+        binfmt-support \
+        qemu \
+        qemu-user \
+        qemu-user-static
+    displayName: 'Install QEMU'
+  - script: |
+      docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
+    displayName: 'Enable Docker multi-architecture support'
+  - script: |
+      git clean -d -f -x
+    displayName: 'Clean source directory'
+  - script: |
+      cat > docker-script.sh <<EOF
+      export CONDA=\$HOME/miniforge
+      export PATH=\$CONDA/bin:/opt/rh/llvm-toolset-7.0/root/usr/bin:\$PATH
+      export LD_LIBRARY_PATH=/opt/rh/llvm-toolset-7.0/root/usr/lib64:\$LD_LIBRARY_PATH
+      \$BUILD_DIRECTORY/.ci/setup.sh || exit 1
+      \$BUILD_DIRECTORY/.ci/test.sh || exit 1
+      EOF
+      IMAGE_URI="lightgbm/vsts-agent:manylinux2014_aarch64"
+      docker pull "${IMAGE_URI}" || exit 1
+      PLATFORM=$(docker inspect --format='{{.Os}}/{{.Architecture}}' "${IMAGE_URI}") || exit 1
+      echo "detected image platform: ${PLATFORM}"
+      docker run \
+        --platform "${PLATFORM}" \
+        --rm \
+        --env AZURE=true \
+        --env BUILD_ARTIFACTSTAGINGDIRECTORY=$BUILD_ARTIFACTSTAGINGDIRECTORY \
+        --env BUILD_DIRECTORY=$BUILD_DIRECTORY \
+        --env COMPILER=$COMPILER \
+        --env METHOD=$METHOD \
+        --env OS_NAME=linux \
+        --env PRODUCES_ARTIFACTS=$PRODUCES_ARTIFACTS \
+        --env PYTHON_VERSION=$PYTHON_VERSION \
+        --env TASK=$TASK \
+        -v "$(Build.SourcesDirectory)":"$BUILD_DIRECTORY" \
+        -v "$(Build.ArtifactStagingDirectory)":"$(Build.ArtifactStagingDirectory)" \
+        "${IMAGE_URI}" \
+        /bin/bash $BUILD_DIRECTORY/docker-script.sh
+    displayName: 'Setup and run tests'
+  - task: PublishBuildArtifacts@1
+    condition: and(succeeded(), in(variables['TASK'], 'bdist'), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/')))
+    inputs:
+      pathtoPublish: '$(Build.ArtifactStagingDirectory)'
+      artifactName: PackageAssets
+      artifactType: container
 ###########################################
 - job: macOS
 ###########################################
@@ -376,7 +376,7 @@ jobs:
   dependsOn:
   - Linux
   - Linux_latest
-  # - QEMU_multiarch
+  - QEMU_multiarch
   - macOS
   - Windows
   - R_artifact

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -2,7 +2,6 @@ option(USE_MPI "Enable MPI-based distributed learning" OFF)
 option(USE_OPENMP "Enable OpenMP" ON)
 option(USE_GPU "Enable GPU-accelerated training" OFF)
 option(USE_SWIG "Enable SWIG to generate Java API" OFF)
-option(USE_HDFS "Enable HDFS support (EXPERIMENTAL)" OFF)
 option(USE_TIMETAG "Set to ON to output time costs" OFF)
 option(USE_CUDA "Enable CUDA-accelerated training " OFF)
 option(USE_DEBUG "Set to ON for Debug mode" OFF)
@@ -297,21 +296,6 @@ if(USE_CUDA)
     endforeach()
 endif()
 
-if(USE_HDFS)
-    message(
-      DEPRECATION
-      "HDFS support in LightGBM is deprecated, and will be removed in a future release.\
-      See https://github.com/microsoft/LightGBM/issues/6436.
-      "
-    )
-    find_package(JNI REQUIRED)
-    find_path(HDFS_INCLUDE_DIR hdfs.h REQUIRED)
-    find_library(HDFS_LIB NAMES hdfs REQUIRED)
-    include_directories(${HDFS_INCLUDE_DIR})
-    add_definitions(-DUSE_HDFS)
-    set(HDFS_CXX_LIBRARIES ${HDFS_LIB} ${JAVA_JVM_LIBRARY})
-endif()
-
 include(CheckCXXSourceCompiles)
 check_cxx_source_compiles("
 #include <xmmintrin.h>
@@ -650,10 +634,6 @@ if(USE_CUDA)
   target_link_libraries(_lightgbm PRIVATE ${histograms})
 endif()
 
-if(USE_HDFS)
-  target_link_libraries(lightgbm_objs PUBLIC ${HDFS_CXX_LIBRARIES})
-endif()
-
 if(WIN32)
     if(MINGW OR CYGWIN)
       target_link_libraries(lightgbm_objs PUBLIC ws2_32 iphlpapi)

diff --git a/build-python.sh b/build-python.sh
@@ -40,8 +40,6 @@
 #                                   Compile CUDA version.
 #     --gpu
 #                                   Compile GPU version.
-#     --hdfs
-#                                   Compile HDFS version.
 #     --integrated-opencl
 #                                   Compile integrated OpenCL version.
 #     --mingw
@@ -148,9 +146,6 @@ while [ $# -gt 0 ]; do
     --gpu)
         BUILD_ARGS="${BUILD_ARGS} --config-setting=cmake.define.USE_GPU=ON"
         ;;
-    --hdfs)
-        BUILD_ARGS="${BUILD_ARGS} --config-setting=cmake.define.USE_HDFS=ON"
-        ;;
     --integrated-opencl)
         BUILD_ARGS="${BUILD_ARGS} --config-setting=cmake.define.__INTEGRATE_OPENCL=ON"
         ;;
@@ -364,13 +359,12 @@ fi
 if test "${INSTALL}" = true; then
     echo "--- installing lightgbm ---"
     cd ../dist
-    # remove existing installation
-    # (useful when building the dev version multiple times, where the version number doesn't change)
-    pip uninstall --yes lightgbm
     # ref for use of '--find-links': https://stackoverflow.com/a/52481267/3986677
     pip install \
         ${PIP_INSTALL_ARGS} \
+        --ignore-installed \
         --no-cache-dir \
+        --no-deps \
         --find-links=. \
         lightgbm
     cd ../

diff --git a/docs/Installation-Guide.rst b/docs/Installation-Guide.rst
@@ -628,41 +628,6 @@ Windows
 The CUDA version is not supported on Windows.
 Use the GPU version (``device_type=gpu``) for GPU acceleration on Windows.
 
-Build HDFS Version
-~~~~~~~~~~~~~~~~~~
-
-.. warning::
-   HDFS support in LightGBM is deprecated, and will be removed in a future release.
-   See https://github.com/microsoft/LightGBM/issues/6436.
-
-The HDFS version of LightGBM was tested on CDH-5.14.4 cluster.
-
-Linux
-^^^^^
-
-On Linux a HDFS version of LightGBM can be built using **CMake** and **gcc**.
-
-1. Install `CMake`_.
-
-2. Run the following commands:
-
-   .. code:: sh
-
-     git clone --recursive https://github.com/microsoft/LightGBM
-     cd LightGBM
-     cmake -B build -S . -DUSE_HDFS=ON
-     # if you have installed HDFS to a customized location, you should specify paths to HDFS headers (hdfs.h) and library (libhdfs.so) like the following:
-     # cmake \
-     #   -DUSE_HDFS=ON \
-     #   -DHDFS_LIB="/opt/cloudera/parcels/CDH-5.14.4-1.cdh5.14.4.p0.3/lib64/libhdfs.so" \
-     #   -DHDFS_INCLUDE_DIR="/opt/cloudera/parcels/CDH-5.14.4-1.cdh5.14.4.p0.3/include/" \
-     #   ..
-     cmake --build build -j4
-
-**Note**: glibc >= 2.14 is required.
-
-**Note**: In some rare cases you may need to install OpenMP runtime library separately (use your package manager and search for ``lib[g|i]omp`` for doing this).
-
 Build Java Wrapper
 ~~~~~~~~~~~~~~~~~~
 

diff --git a/docs/_static/js/script.js b/docs/_static/js/script.js
@@ -29,7 +29,6 @@ $(function() {
             '#build-mpi-version',
             '#build-gpu-version',
             '#build-cuda-version',
-            '#build-hdfs-version',
             '#build-java-wrapper',
             '#build-c-unit-tests'
         ];

diff --git a/include/LightGBM/arrow.h b/include/LightGBM/arrow.h
@@ -122,7 +122,7 @@ class ArrowChunkedArray {
                            const struct ArrowSchema* schema)
       : releases_arrow_(true) {
     chunks_.reserve(n_chunks);
-    for (auto k = 0; k < n_chunks; ++k) {
+    for (int64_t k = 0; k < n_chunks; ++k) {
       if (chunks[k].length == 0) continue;
       chunks_.push_back(&chunks[k]);
     }

diff --git a/include/LightGBM/boosting.h b/include/LightGBM/boosting.h
@@ -74,7 +74,7 @@ class LIGHTGBM_EXPORT Boosting {
   /*!
   * \brief Update the tree output by new training data
   */
-  virtual void RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction) = 0;
+  virtual void RefitTree(const int* tree_leaf_prediction, const size_t nrow, const size_t ncol) = 0;
 
   /*!
   * \brief Training logic