From 275692006df1a16ee7a79897c52b13e85e189f75 Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Fri, 12 Jan 2024 12:54:05 -0800 Subject: [PATCH] Add GitHub Actions action that builds LBANN (#2418) --- .github/workflows/build-cpu.yml | 74 +++++++++++++++++++ .gitignore | 3 + scripts/build_lbann.sh | 6 +- scripts/customize_build_env.sh | 1 + scripts/utilities.sh | 3 +- .../readers/data_reader_mnist.cpp | 32 ++++++-- src/execution_algorithms/kfac.cpp | 3 + src/utils/amp.cpp | 1 + src/utils/file_utils.cpp | 6 +- 9 files changed, 114 insertions(+), 15 deletions(-) create mode 100644 .github/workflows/build-cpu.yml diff --git a/.github/workflows/build-cpu.yml b/.github/workflows/build-cpu.yml new file mode 100644 index 00000000000..56b631709c8 --- /dev/null +++ b/.github/workflows/build-cpu.yml @@ -0,0 +1,74 @@ +name: LBANN CPU + +on: + push: + branches: develop + pull_request: + branches: develop + merge_group: + branches: develop + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + compiler: [gcc] # , clang + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'pip' + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y libyaml-dev cmake lmod ninja-build + sudo apt-get install -y libblas-dev libopenblas-dev liblapacke-dev + sudo apt-get install -y openmpi-bin openmpi-common libopenmpi-dev + python -m pip install --upgrade pip + + - name: Restore cached Spack-built dependencies + id: cache-spack + uses: actions/cache/restore@v3 + with: + path: | + ~/.spack + spack + key: ${{ runner.os }}-${{ matrix.compiler }}-spackdeps + + - name: Build and install LBANN dependencies + if: steps.cache-spack.outputs.cache-hit != 'true' + run: | + source /usr/share/lmod/lmod/init/bash + git clone -c feature.manyFiles=true https://github.com/spack/spack.git + cd spack + git checkout 73858df14dc3f0e701814c84bb8bd6b72f80a806 # Use a tried and true version of Spack + cd .. + source spack/share/spack/setup-env.sh + scripts/build_lbann.sh -d --dependencies-only -l ci -- +numpy +unit_tests %${{ matrix.compiler }} + + - name: Cache Spack-built dependencies + id: cache-spack-save + uses: actions/cache/save@v3 + with: + path: | + ~/.spack + spack + key: ${{ runner.os }}-${{ matrix.compiler }}-spackdeps + + - name: Build LBANN + run: | + source /usr/share/lmod/lmod/init/bash + source spack/share/spack/setup-env.sh + scripts/build_lbann.sh -r -l ci --ci -- +numpy +unit_tests %${{ matrix.compiler }} + + - name: Test Catch2 + run: | + cd builds/*/build + ./unit_test/helpers_tests + ./unit_test/seq-catch-tests diff --git a/.gitignore b/.gitignore index 2a22e338cce..405a1f36ce1 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,6 @@ spack_environments/users/ # we don't want to collect slurm output **/slurm-*.out + +# Ignore default lbann output experiment directory names +????????_??????_lbann*/ diff --git a/scripts/build_lbann.sh b/scripts/build_lbann.sh index 876af637aeb..df9d091a356 100755 --- a/scripts/build_lbann.sh +++ b/scripts/build_lbann.sh @@ -40,6 +40,7 @@ fi SPACK_INSTALL_DEPENDENCIES_ONLY= # List of packages to install at the root level with LBANN SPACK_EXTRA_ROOT_PACKAGES= +EXTRA_NINJA_FLAGS= CONFIG_FILE_NAME= @@ -120,7 +121,7 @@ while :; do --ci) # We want all compilation errors LBANN_WARNINGS_AS_ERRORS="TRUE" - EXTRA_NINJA_FLAGS="-k 0" + EXTRA_NINJA_FLAGS="${EXTRA_NINJA_FLAGS} -k 0" ;; --ci-pip) PIP_EXTRAS="${PIP_EXTRAS} ${LBANN_HOME}/ci_test/requirements.txt" @@ -174,6 +175,7 @@ while :; do -j|--build-jobs) if [ -n "${2}" ]; then BUILD_JOBS="-j${2}" + EXTRA_NINJA_FLAGS="${EXTRA_NINJA_FLAGS} -j${2}" shift else echo "\"${1}\" option requires a non-empty option argument" >&2 @@ -1119,7 +1121,7 @@ EOF echo "I have found and will use ${MATCHED_CONFIG_FILE}" CONFIG_FILE_NAME=${MATCHED_CONFIG_FILE} if [[ ! -e "${LBANN_BUILD_PARENT_DIR}/${CONFIG_FILE_NAME}" ]]; then - echo "Overwritting exising CMake config file in ${LBANN_BUILD_PARENT_DIR}/${CONFIG_FILE_NAME}" + echo "Overwriting exising CMake config file in ${LBANN_BUILD_PARENT_DIR}/${CONFIG_FILE_NAME}" fi # Save the config file in the build directory CMD="mv ${MATCHED_CONFIG_FILE_PATH} ${LBANN_BUILD_PARENT_DIR}/${CONFIG_FILE_NAME}" diff --git a/scripts/customize_build_env.sh b/scripts/customize_build_env.sh index d3ab8fba06e..cc958770f3a 100644 --- a/scripts/customize_build_env.sh +++ b/scripts/customize_build_env.sh @@ -286,6 +286,7 @@ set_center_specific_spack_dependencies() esac else echo "No center found and no center-specified CENTER_DEPENDENCIES for ${spack_arch_target} at ${center}." + CENTER_COMPILER="%gcc" # Relatively safe default choice fi } diff --git a/scripts/utilities.sh b/scripts/utilities.sh index f115200282a..97445aeb541 100644 --- a/scripts/utilities.sh +++ b/scripts/utilities.sh @@ -35,7 +35,8 @@ function find_cmake_config_file() { HOST=$(hostname) HOST=${HOST//[[:digit:]]/} - HOST=${HOST//\-/} + HOST=$(echo $HOST | sed 's/\(.*\)-$/\1/') + [[ -z "${SYS_TYPE}" ]] && SYS=${SPACK_ARCH} || SYS="${SYS_TYPE}" if [[ "${center_compiler}" =~ .*"%".*"@".* ]]; then diff --git a/src/data_ingestion/readers/data_reader_mnist.cpp b/src/data_ingestion/readers/data_reader_mnist.cpp index 5dacaef663a..1c38a595ada 100644 --- a/src/data_ingestion/readers/data_reader_mnist.cpp +++ b/src/data_ingestion/readers/data_reader_mnist.cpp @@ -108,8 +108,12 @@ void load_mnist_data(const std::string imagepath, } int magicnum1, numitems1; - fread(&magicnum1, 4, 1, fplbl); - fread(&numitems1, 4, 1, fplbl); + if (!fread(&magicnum1, 4, 1, fplbl)) { + LBANN_ERROR("Invalid MNIST file format (1)"); + } + if (!fread(&numitems1, 4, 1, fplbl)) { + LBANN_ERROR("Invalid MNIST file format (2)"); + } __swapEndianInt((unsigned int&)magicnum1); __swapEndianInt((unsigned int&)numitems1); @@ -122,10 +126,18 @@ void load_mnist_data(const std::string imagepath, } int magicnum2, numitems2, imgwidth, imgheight; - fread(&magicnum2, 4, 1, fpimg); - fread(&numitems2, 4, 1, fpimg); - fread(&imgwidth, 4, 1, fpimg); - fread(&imgheight, 4, 1, fpimg); + if (!fread(&magicnum2, 4, 1, fpimg)) { + LBANN_ERROR("Invalid MNIST file format (3)"); + } + if (!fread(&numitems2, 4, 1, fpimg)) { + LBANN_ERROR("Invalid MNIST file format (4)"); + } + if (!fread(&imgwidth, 4, 1, fpimg)) { + LBANN_ERROR("Invalid MNIST file format (5)"); + } + if (!fread(&imgheight, 4, 1, fpimg)) { + LBANN_ERROR("Invalid MNIST file format (6)"); + } __swapEndianInt((unsigned int&)magicnum2); __swapEndianInt((unsigned int&)numitems2); __swapEndianInt((unsigned int&)imgwidth); @@ -147,8 +159,12 @@ void load_mnist_data(const std::string imagepath, m_image_data.resize(numitems1); for (int n = 0; n < numitems1; n++) { m_image_data[n].resize(1 + (imgwidth * imgheight)); - fread(&m_image_data[n][0], 1, 1, fplbl); - fread(&m_image_data[n][1], imgwidth * imgheight, 1, fpimg); + if (!fread(&m_image_data[n][0], 1, 1, fplbl)) { + LBANN_ERROR("Invalid MNIST file format (7)"); + } + if (!fread(&m_image_data[n][1], imgwidth * imgheight, 1, fpimg)) { + LBANN_ERROR("Invalid MNIST file format (8)"); + } } fclose(fpimg); fclose(fplbl); diff --git a/src/execution_algorithms/kfac.cpp b/src/execution_algorithms/kfac.cpp index b067d2ac7be..9e04b8c5fbc 100644 --- a/src/execution_algorithms/kfac.cpp +++ b/src/execution_algorithms/kfac.cpp @@ -264,7 +264,10 @@ bool KFAC::train_mini_batch(ExeContextType& kfac_context, data_coordinator& dc) { LBANN_CALIPER_MARK_FUNCTION; +#ifdef LBANN_HAS_GPU bool profile = true; +#endif + auto& sgd_context = kfac_context.get_sgd_execution_context(); auto current_epoch = sgd_context.get_epoch(); diff --git a/src/utils/amp.cpp b/src/utils/amp.cpp index 78845f04b27..123e3dc114f 100644 --- a/src/utils/amp.cpp +++ b/src/utils/amp.cpp @@ -35,6 +35,7 @@ namespace { bool isfinite(__half x) { return std::isfinite((float)x); } } // namespace #endif +using std::isfinite; namespace lbann { namespace amp { diff --git a/src/utils/file_utils.cpp b/src/utils/file_utils.cpp index 792879957cb..b291268850d 100644 --- a/src/utils/file_utils.cpp +++ b/src/utils/file_utils.cpp @@ -214,17 +214,15 @@ namespace file { std::string extract_parent_directory(const std::string& path) { - std::vector buffer(path.size() + 1); + std::vector buffer(path.size() + 1, '\0'); path.copy(buffer.data(), path.size()); - buffer.back() = '\0'; return ::dirname(buffer.data()); } std::string extract_base_name(const std::string& path) { - std::vector buffer(path.size() + 1); + std::vector buffer(path.size() + 1, '\0'); path.copy(buffer.data(), path.size()); - buffer.back() = '\0'; return ::basename(buffer.data()); }