Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions EESSI-install-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -271,11 +271,18 @@ fi

# Install NVIDIA drivers in host_injections (if they exist)
if command_exists "nvidia-smi"; then
echo "Command 'nvidia-smi' found. Installing NVIDIA drivers for use in prefix shell..."
${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh
nvidia-smi --version
ec=$?
if [ ${ec} -eq 0 ]; then
echo "Command 'nvidia-smi' found. Installing NVIDIA drivers for use in prefix shell..."
${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh
else
echo "Warning: command 'nvidia-smi' found, but 'nvidia-smi --version' did not run succesfully."
echo "This script now assumes this is NOT a GPU node."
echo "If, and only if, the current node actually does contain Nvidia GPUs, this should be considered an error."
fi
fi


if [ ! -z "${shared_fs_path}" ]; then
shared_eb_sourcepath=${shared_fs_path}/easybuild/sources
echo ">> Using ${shared_eb_sourcepath} as shared EasyBuild source path"
Expand Down
18 changes: 16 additions & 2 deletions bot/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -243,14 +243,28 @@ mkdir -p ${TARBALL_TMP_BUILD_STEP_DIR}
# prepare arguments to eessi_container.sh specific to build step
BUILD_STEP_ARGS+=("--save" "${TARBALL_TMP_BUILD_STEP_DIR}")
BUILD_STEP_ARGS+=("--storage" "${STORAGE}")

# add options required to handle NVIDIA support
if command_exists "nvidia-smi"; then
echo "Command 'nvidia-smi' found, using available GPU"
BUILD_STEP_ARGS+=("--nvidia" "all")
# Accept that this may fail
set +e
nvidia-smi --version
ec=$?
set -e
if [ ${ec} -eq 0 ]; then
echo "Command 'nvidia-smi' found, using available GPU"
BUILD_STEP_ARGS+=("--nvidia" "all")
else
echo "Warning: command 'nvidia-smi' found, but 'nvidia-smi --version' did not run succesfully."
echo "This script now assumes this is NOT a GPU node."
echo "If, and only if, the current node actually does contain Nvidia GPUs, this should be considered an error."
BUILD_STEP_ARGS+=("--nvidia" "install")
fi
else
echo "No 'nvidia-smi' found, no available GPU but allowing overriding this check"
BUILD_STEP_ARGS+=("--nvidia" "install")
fi

# Retain location for host injections so we don't reinstall CUDA
# (Always need to run the driver installation as available driver may change)
if [[ ! -z ${SHARED_FS_PATH} ]]; then
Expand Down
15 changes: 13 additions & 2 deletions bot/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,19 @@ TEST_STEP_ARGS+=("--extra-bind-paths" "/sys/fs/cgroup:/hostsys/fs/cgroup:ro")

# add options required to handle NVIDIA support
if command_exists "nvidia-smi"; then
echo "Command 'nvidia-smi' found, using available GPU"
TEST_STEP_ARGS+=("--nvidia" "run")
# Accept that this may fail
set +e
nvidia-smi --version
ec=$?
set -e
if [ ${ec} -eq 0 ]; then
echo "Command 'nvidia-smi' found, using available GPU"
TEST_STEP_ARGS+=("--nvidia" "run")
else
echo "Warning: command 'nvidia-smi' found, but 'nvidia-smi --version' did not run succesfully."
echo "This script now assumes this is NOT a GPU node."
echo "If, and only if, the current node actually does contain Nvidia GPUs, this should be considered an error."
fi
fi

# prepare arguments to test_suite.sh (specific to test step)
Expand Down