diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 3a9ba175c9..25057216a3 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -271,11 +271,18 @@ fi # Install NVIDIA drivers in host_injections (if they exist) if command_exists "nvidia-smi"; then - echo "Command 'nvidia-smi' found. Installing NVIDIA drivers for use in prefix shell..." - ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh + nvidia-smi --version + ec=$? + if [ ${ec} -eq 0 ]; then + echo "Command 'nvidia-smi' found. Installing NVIDIA drivers for use in prefix shell..." + ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh + else + echo "Warning: command 'nvidia-smi' found, but 'nvidia-smi --version' did not run succesfully." + echo "This script now assumes this is NOT a GPU node." + echo "If, and only if, the current node actually does contain Nvidia GPUs, this should be considered an error." + fi fi - if [ ! -z "${shared_fs_path}" ]; then shared_eb_sourcepath=${shared_fs_path}/easybuild/sources echo ">> Using ${shared_eb_sourcepath} as shared EasyBuild source path" diff --git a/bot/build.sh b/bot/build.sh index 29444a32c2..d904a020e2 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -243,14 +243,28 @@ mkdir -p ${TARBALL_TMP_BUILD_STEP_DIR} # prepare arguments to eessi_container.sh specific to build step BUILD_STEP_ARGS+=("--save" "${TARBALL_TMP_BUILD_STEP_DIR}") BUILD_STEP_ARGS+=("--storage" "${STORAGE}") + # add options required to handle NVIDIA support if command_exists "nvidia-smi"; then - echo "Command 'nvidia-smi' found, using available GPU" - BUILD_STEP_ARGS+=("--nvidia" "all") + # Accept that this may fail + set +e + nvidia-smi --version + ec=$? + set -e + if [ ${ec} -eq 0 ]; then + echo "Command 'nvidia-smi' found, using available GPU" + BUILD_STEP_ARGS+=("--nvidia" "all") + else + echo "Warning: command 'nvidia-smi' found, but 'nvidia-smi --version' did not run succesfully." + echo "This script now assumes this is NOT a GPU node." + echo "If, and only if, the current node actually does contain Nvidia GPUs, this should be considered an error." + BUILD_STEP_ARGS+=("--nvidia" "install") + fi else echo "No 'nvidia-smi' found, no available GPU but allowing overriding this check" BUILD_STEP_ARGS+=("--nvidia" "install") fi + # Retain location for host injections so we don't reinstall CUDA # (Always need to run the driver installation as available driver may change) if [[ ! -z ${SHARED_FS_PATH} ]]; then diff --git a/bot/test.sh b/bot/test.sh index 464c4817a9..2b1d98c488 100755 --- a/bot/test.sh +++ b/bot/test.sh @@ -214,8 +214,19 @@ TEST_STEP_ARGS+=("--extra-bind-paths" "/sys/fs/cgroup:/hostsys/fs/cgroup:ro") # add options required to handle NVIDIA support if command_exists "nvidia-smi"; then - echo "Command 'nvidia-smi' found, using available GPU" - TEST_STEP_ARGS+=("--nvidia" "run") + # Accept that this may fail + set +e + nvidia-smi --version + ec=$? + set -e + if [ ${ec} -eq 0 ]; then + echo "Command 'nvidia-smi' found, using available GPU" + TEST_STEP_ARGS+=("--nvidia" "run") + else + echo "Warning: command 'nvidia-smi' found, but 'nvidia-smi --version' did not run succesfully." + echo "This script now assumes this is NOT a GPU node." + echo "If, and only if, the current node actually does contain Nvidia GPUs, this should be considered an error." + fi fi # prepare arguments to test_suite.sh (specific to test step)