From a725317c923e2df6308f499eeb5ecbf9d35d9a25 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Thu, 21 Mar 2024 00:40:24 +0000 Subject: [PATCH 1/5] cudaPackages.autoFixElfFiles: elfAddRunpaths shorthand: init python3Packages.torch-bin: fix lazy nvrtc --- .../auto-add-cuda-compat-runpath.sh | 45 +++++++----- .../auto-add-driver-runpath-hook.sh | 11 +-- .../setup-hooks/auto-fix-elf-files.sh | 72 ++++++++++++++++++- .../cuda-modules/setup-hooks/extension.nix | 9 ++- 4 files changed, 111 insertions(+), 26 deletions(-) diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath.sh b/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath.sh index fc41024f1551a..780191ea03f48 100644 --- a/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath.sh +++ b/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath.sh @@ -1,27 +1,36 @@ # shellcheck shell=bash # Patch all dynamically linked, ELF files with the CUDA driver (libcuda.so) # coming from the cuda_compat package by adding it to the RUNPATH. + +[[ -n ${autoAddCudaCompatRunpath_Once-} ]] && return +declare -g autoAddCudaCompatRunpath_Once=1 + echo "Sourcing auto-add-cuda-compat-runpath-hook" -addCudaCompatRunpath() { - local libPath - local origRpath +arrayInsertBefore() { + local -n arrayRef="$1" # Namerefs, bash >= 4.3: + local pattern="$2" + local item="$3" + shift 3 - if [[ $# -eq 0 ]]; then - echo "addCudaCompatRunpath: no library path provided" >&2 - exit 1 - elif [[ $# -gt 1 ]]; then - echo "addCudaCompatRunpath: too many arguments" >&2 - exit 1 - elif [[ "$1" == "" ]]; then - echo "addCudaCompatRunpath: empty library path" >&2 - exit 1 - else - libPath="$1" - fi + local i + local foundMatch= - origRpath="$(patchelf --print-rpath "$libPath")" - patchelf --set-rpath "@libcudaPath@:$origRpath" "$libPath" + local -a newArray + for i in "${arrayRef[@]}" ; do + if [[ "$i" == "$pattern" ]] ; then + newArray+=( "$item" ) + foundMatch=1 + fi + newArray+=( "$i" ) + done + if [[ -z "$foundMatch" ]] ; then + newArray+=( "$item" ) + fi + arrayRef=( "${newArray[@]}" ) } -postFixupHooks+=("autoFixElfFiles addCudaCompatRunpath") + +if [[ -n "@libcudaPath@" ]] ; then + arrayInsertBefore elfPrependRunpaths "@driverLink@/lib" "@libcudaPath@" +fi diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook.sh b/pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook.sh index ecff2a032d64f..75bff8b76bbe6 100644 --- a/pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook.sh +++ b/pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook.sh @@ -1,8 +1,11 @@ # shellcheck shell=bash -# Run addDriverRunpath on all dynamically linked ELF files -echo "Sourcing auto-add-driver-runpath-hook" +# Equivalent to running addDriverRunpath on all dynamically linked ELF files + +[[ -n ${autoAddDriverRunpath_Once-} ]] && return +declare -g autoAddDriverRunpath_Once=1 + +echo "Sourcing auto-add-driver-runpath-hook.sh" if [ -z "${dontUseAutoAddDriverRunpath-}" ]; then - echo "Using autoAddDriverRunpath" - postFixupHooks+=("autoFixElfFiles addDriverRunpath") + elfPrependRunpaths+=( "@driverLink@/lib" ) fi diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files.sh b/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files.sh index 1d57dfb17a66d..8bf4e814471bd 100644 --- a/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files.sh +++ b/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files.sh @@ -2,7 +2,11 @@ # List all dynamically linked ELF files in the outputs and apply a generic fix # action provided as a parameter (currently used to add the CUDA or the # cuda_compat driver to the runpath of binaries) -echo "Sourcing cuda/fix-elf-files.sh" + +[[ -n ${autoFixElfFiles_Once-} ]] && return +declare -g autoFixElfFiles_Once=1 + +echo "Sourcing auto-fix-elf-files.sh" # Returns the exit code of patchelf --print-rpath. # A return code of 0 (success) means the ELF file has a dynamic section, while @@ -62,3 +66,69 @@ autoFixElfFiles() { fi done } + +inputsToArray() { + local inputVar="$1" + local outputVar="$2" + shift 2 + + local -n namerefOut="$outputVar" + + if [ -z "${!inputVar+1}" ] ; then + # Undeclared variable + return + fi + + local type="$(declare -p "$inputVar")" + if [[ "$type" =~ "declare -a" ]]; then + local -n namerefIn="$inputVar" + namerefOut=( "${namerefIn[@]}" ) + else + read -r -a namerefOut <<< "${!inputVar}" + fi +} + +elfBuildRunpathStrings() { + local path + local -a elfAddRunpathsArray elfPrependRunpathsArray + + inputsToArray elfAddRunpaths elfAddRunpathsArray + inputsToArray elfPrependRunpaths elfPrependRunpathsArray + + for path in "${elfPrependRunpathsArray[@]}" ; do + elfAddRunpathsPrefix="$elfAddRunpathsPrefix:$path" + done + elfAddRunpathsPrefix="${elfAddRunpathsPrefix##:}" + + for path in "${elfAddRunpathsArray[@]}" ; do + elfAddRunpathsSuffix="$elfAddRunpathsSuffix:$path" + done + elfAddRunpathsSuffix="${elfAddRunpathsSuffix##:}" +} + +# Expects that elfAddRunpathPrefix and elfAddRunpathSuffix are set +elfAddRunpathsAction() { + local origPath="$(patchelf --print-rpath "$1")" + local newPath + + newPath="$elfAddRunpathsPrefix" + newPath="${newPath}${newPath:+:}${origPath}" + newPath="${newPath}${elfAddRunpathsSuffix:+:}${elfAddRunpathsSuffix}" + + (( "${NIX_DEBUG:-0}" >= 4 )) && echo patchelf --set-rpath "$newPath" "$1" >&2 + patchelf --set-rpath "$newPath" "$1" +} + +elfAddRunpathsHook() { + [[ -z "${elfAddRunpaths[@]}" ]] && [[ -z "${elfPrependRunpaths[@]}" ]] && return + + echo "Executing elfAddRunpaths: ${elfAddRunpaths[@]}" >&2 + [[ -z "${elfPrependRunpaths[@]}" ]] || echo "elfPrependRunpaths: ${elfPrependRunpaths[@]}" >&2 + + local elfAddRunpathsPrefix + local elfAddRunpathsSuffix + elfBuildRunpathStrings + autoFixElfFiles elfAddRunpathsAction +} + +postFixupHooks+=(elfAddRunpathsHook) diff --git a/pkgs/development/cuda-modules/setup-hooks/extension.nix b/pkgs/development/cuda-modules/setup-hooks/extension.nix index ece70da52b027..e8b6d5eba16e4 100644 --- a/pkgs/development/cuda-modules/setup-hooks/extension.nix +++ b/pkgs/development/cuda-modules/setup-hooks/extension.nix @@ -54,7 +54,8 @@ final: _: { makeSetupHook { name = "auto-add-opengl-runpath-hook"; - propagatedBuildInputs = [addDriverRunpath autoFixElfFiles]; + propagatedBuildInputs = [autoFixElfFiles]; + substitutions = { inherit (addDriverRunpath) driverLink; }; } ./auto-add-driver-runpath-hook.sh ) @@ -71,15 +72,17 @@ final: _: { autoAddCudaCompatRunpath = final.callPackage ( - {makeSetupHook, autoFixElfFiles, cuda_compat ? null }: + {makeSetupHook, addDriverRunpath, autoFixElfFiles, cuda_compat ? null }: makeSetupHook { name = "auto-add-cuda-compat-runpath-hook"; propagatedBuildInputs = [autoFixElfFiles]; substitutions = { + inherit (addDriverRunpath) driverLink; + # Hotfix Ofborg evaluation - libcudaPath = if final.flags.isJetsonBuild then "${cuda_compat}/compat" else null; + libcudaPath = if final.flags.isJetsonBuild then "${cuda_compat}/compat" else ""; }; meta.broken = !final.flags.isJetsonBuild; From a724abdad8a78198c5958bdb51c09028d361e441 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Thu, 21 Mar 2024 00:47:43 +0000 Subject: [PATCH 2/5] cudaPackages.autoFixElfFiles: decrease default verbosity --- pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files.sh b/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files.sh index 8bf4e814471bd..5be8616691771 100644 --- a/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files.sh +++ b/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files.sh @@ -59,7 +59,7 @@ autoFixElfFiles() { elif elfHasDynamicSection "$f"; then # patchelf returns an error on statically linked ELF files, and in # practice fixing actions all involve patchelf - echo "autoFixElfFiles: using $fixAction to fix $f" >&2 + (( "${NIX_DEBUG:-0}" >= 1 )) && echo "autoFixElfFiles: using $fixAction to fix $f" >&2 $fixAction "$f" elif (( "${NIX_DEBUG:-0}" >= 1 )); then echo "autoFixElfFiles: skipping a statically-linked ELF file $f" From 2906f88afd53be2b34f7c892885839a76580be3e Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Thu, 21 Mar 2024 00:41:16 +0000 Subject: [PATCH 3/5] cudaPackages.saxpy: re-enable eval for Jetsons --- pkgs/development/cuda-modules/saxpy/default.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/pkgs/development/cuda-modules/saxpy/default.nix b/pkgs/development/cuda-modules/saxpy/default.nix index bc299dea006f4..d50565e432a49 100644 --- a/pkgs/development/cuda-modules/saxpy/default.nix +++ b/pkgs/development/cuda-modules/saxpy/default.nix @@ -56,6 +56,5 @@ backendStdenv.mkDerivation { license = lib.licenses.mit; maintainers = lib.teams.cuda.members; platforms = lib.platforms.unix; - badPlatforms = lib.optionals flags.isJetsonBuild platforms; }; } From e5dc228f51a10259f237689595a585d9853e8754 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Thu, 21 Mar 2024 00:42:49 +0000 Subject: [PATCH 4/5] python3Packages.torch: cuda: support dlopen(libnvrtc) Fixes https://github.com/NixOS/nixpkgs/issues/296179 --- pkgs/development/python-modules/torch/default.nix | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix index 10eecd1de99b7..2f2287d663c3f 100644 --- a/pkgs/development/python-modules/torch/default.nix +++ b/pkgs/development/python-modules/torch/default.nix @@ -337,6 +337,8 @@ in buildPythonPackage rec { pybind11 pythonRelaxDepsHook removeReferencesTo + ] ++ lib.optionals stdenv.hostPlatform.isLinux [ + cudaPackages.autoFixElfFiles ] ++ lib.optionals cudaSupport (with cudaPackages; [ autoAddDriverRunpath cuda_nvcc @@ -488,6 +490,10 @@ in buildPythonPackage rec { install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libshm.dylib ''; + elfAddRunpaths = lib.optionals cudaSupport [ + "${lib.getLib cudaPackages.cuda_nvrtc}/lib" + ]; + # Builds in 2+h with 2 cores, and ~15m with a big-parallel builder. requiredSystemFeatures = [ "big-parallel" ]; From 026df8f4e5ebba8619c9fb9643702144b05fd20b Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Tue, 26 Mar 2024 17:54:00 +0000 Subject: [PATCH 5/5] python3Packages.torch-bin: fix lazy nvrtc --- pkgs/development/python-modules/torch/bin.nix | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pkgs/development/python-modules/torch/bin.nix b/pkgs/development/python-modules/torch/bin.nix index bee32b6163453..944f958a5eb20 100644 --- a/pkgs/development/python-modules/torch/bin.nix +++ b/pkgs/development/python-modules/torch/bin.nix @@ -88,9 +88,11 @@ in buildPythonPackage { rm -rf $out/bin ''; - postFixup = lib.optionalString stdenv.isLinux '' - addAutoPatchelfSearchPath "$out/${python.sitePackages}/torch/lib" - ''; + elfAddRunpaths = [ + "${lib.getLib cudaPackages.cuda_nvrtc}/lib" + "$ORIGIN" + ]; + # The wheel-binary is not stripped to avoid the error of `ImportError: libtorch_cuda_cpp.so: ELF load command address/offset not properly aligned.`. dontStrip = true;