Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pkgs/development/cuda-modules/saxpy/default.nix
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

omg, I recovered the string-splitting in the last force-push, but I broke cuda_compat again because I do the string-splitting too late 🙈

Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,5 @@ backendStdenv.mkDerivation {
license = lib.licenses.mit;
maintainers = lib.teams.cuda.members;
platforms = lib.platforms.unix;
badPlatforms = lib.optionals flags.isJetsonBuild platforms;
};
}
Original file line number Diff line number Diff line change
@@ -1,27 +1,36 @@
# shellcheck shell=bash
# Patch all dynamically linked, ELF files with the CUDA driver (libcuda.so)
# coming from the cuda_compat package by adding it to the RUNPATH.

[[ -n ${autoAddCudaCompatRunpath_Once-} ]] && return
declare -g autoAddCudaCompatRunpath_Once=1

echo "Sourcing auto-add-cuda-compat-runpath-hook"

addCudaCompatRunpath() {
local libPath
local origRpath
arrayInsertBefore() {
local -n arrayRef="$1" # Namerefs, bash >= 4.3:
local pattern="$2"
local item="$3"
shift 3

if [[ $# -eq 0 ]]; then
echo "addCudaCompatRunpath: no library path provided" >&2
exit 1
elif [[ $# -gt 1 ]]; then
echo "addCudaCompatRunpath: too many arguments" >&2
exit 1
elif [[ "$1" == "" ]]; then
echo "addCudaCompatRunpath: empty library path" >&2
exit 1
else
libPath="$1"
fi
local i
local foundMatch=

origRpath="$(patchelf --print-rpath "$libPath")"
patchelf --set-rpath "@libcudaPath@:$origRpath" "$libPath"
local -a newArray
for i in "${arrayRef[@]}" ; do
if [[ "$i" == "$pattern" ]] ; then
newArray+=( "$item" )
foundMatch=1
fi
newArray+=( "$i" )
done
if [[ -z "$foundMatch" ]] ; then
newArray+=( "$item" )
fi
arrayRef=( "${newArray[@]}" )
}

postFixupHooks+=("autoFixElfFiles addCudaCompatRunpath")

if [[ -n "@libcudaPath@" ]] ; then
arrayInsertBefore elfPrependRunpaths "@driverLink@/lib" "@libcudaPath@"
fi
Comment on lines +10 to +36
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

related: #385960

Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# shellcheck shell=bash
# Run addDriverRunpath on all dynamically linked ELF files
echo "Sourcing auto-add-driver-runpath-hook"
# Equivalent to running addDriverRunpath on all dynamically linked ELF files

[[ -n ${autoAddDriverRunpath_Once-} ]] && return
declare -g autoAddDriverRunpath_Once=1

echo "Sourcing auto-add-driver-runpath-hook.sh"

if [ -z "${dontUseAutoAddDriverRunpath-}" ]; then
echo "Using autoAddDriverRunpath"
postFixupHooks+=("autoFixElfFiles addDriverRunpath")
elfPrependRunpaths+=( "@driverLink@/lib" )
Comment on lines +2 to +10
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fi
74 changes: 72 additions & 2 deletions pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
# List all dynamically linked ELF files in the outputs and apply a generic fix
# action provided as a parameter (currently used to add the CUDA or the
# cuda_compat driver to the runpath of binaries)
echo "Sourcing cuda/fix-elf-files.sh"

[[ -n ${autoFixElfFiles_Once-} ]] && return
declare -g autoFixElfFiles_Once=1

echo "Sourcing auto-fix-elf-files.sh"

# Returns the exit code of patchelf --print-rpath.
# A return code of 0 (success) means the ELF file has a dynamic section, while
Expand Down Expand Up @@ -55,10 +59,76 @@ autoFixElfFiles() {
elif elfHasDynamicSection "$f"; then
# patchelf returns an error on statically linked ELF files, and in
# practice fixing actions all involve patchelf
echo "autoFixElfFiles: using $fixAction to fix $f" >&2
(( "${NIX_DEBUG:-0}" >= 1 )) && echo "autoFixElfFiles: using $fixAction to fix $f" >&2
$fixAction "$f"
elif (( "${NIX_DEBUG:-0}" >= 1 )); then
echo "autoFixElfFiles: skipping a statically-linked ELF file $f"
fi
done
}

inputsToArray() {
local inputVar="$1"
local outputVar="$2"
shift 2

local -n namerefOut="$outputVar"

if [ -z "${!inputVar+1}" ] ; then
# Undeclared variable
return
fi

local type="$(declare -p "$inputVar")"
if [[ "$type" =~ "declare -a" ]]; then
local -n namerefIn="$inputVar"
namerefOut=( "${namerefIn[@]}" )
else
read -r -a namerefOut <<< "${!inputVar}"
fi
}

elfBuildRunpathStrings() {
local path
local -a elfAddRunpathsArray elfPrependRunpathsArray

inputsToArray elfAddRunpaths elfAddRunpathsArray
inputsToArray elfPrependRunpaths elfPrependRunpathsArray

for path in "${elfPrependRunpathsArray[@]}" ; do
elfAddRunpathsPrefix="$elfAddRunpathsPrefix:$path"
done
elfAddRunpathsPrefix="${elfAddRunpathsPrefix##:}"

for path in "${elfAddRunpathsArray[@]}" ; do
elfAddRunpathsSuffix="$elfAddRunpathsSuffix:$path"
done
elfAddRunpathsSuffix="${elfAddRunpathsSuffix##:}"
}

# Expects that elfAddRunpathPrefix and elfAddRunpathSuffix are set
elfAddRunpathsAction() {
local origPath="$(patchelf --print-rpath "$1")"
local newPath

newPath="$elfAddRunpathsPrefix"
newPath="${newPath}${newPath:+:}${origPath}"
newPath="${newPath}${elfAddRunpathsSuffix:+:}${elfAddRunpathsSuffix}"

(( "${NIX_DEBUG:-0}" >= 4 )) && echo patchelf --set-rpath "$newPath" "$1" >&2
patchelf --set-rpath "$newPath" "$1"
}

elfAddRunpathsHook() {
[[ -z "${elfAddRunpaths[@]}" ]] && [[ -z "${elfPrependRunpaths[@]}" ]] && return

echo "Executing elfAddRunpaths: ${elfAddRunpaths[@]}" >&2
[[ -z "${elfPrependRunpaths[@]}" ]] || echo "elfPrependRunpaths: ${elfPrependRunpaths[@]}" >&2

local elfAddRunpathsPrefix
local elfAddRunpathsSuffix
elfBuildRunpathStrings
autoFixElfFiles elfAddRunpathsAction
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if autoFixElfFiles is only used there, I wonder if it's not simpler to inline it. As much as I liked my higher-order solution 😛 (or, another solution if we want to keep the code nicely modular in those different bash functions, would be to hardcode elfAddRunpathsAction instead of fixAction). Unless you envision autoFixElfFiles to be useful for other things?

}

postFixupHooks+=(elfAddRunpathsHook)
9 changes: 6 additions & 3 deletions pkgs/development/cuda-modules/setup-hooks/extension.nix
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ final: _: {
makeSetupHook
{
name = "auto-add-opengl-runpath-hook";
propagatedBuildInputs = [addDriverRunpath autoFixElfFiles];
propagatedBuildInputs = [autoFixElfFiles];
substitutions = { inherit (addDriverRunpath) driverLink; };
}
./auto-add-driver-runpath-hook.sh
)
Expand All @@ -71,15 +72,17 @@ final: _: {
autoAddCudaCompatRunpath =
final.callPackage
(
{makeSetupHook, autoFixElfFiles, cuda_compat ? null }:
{makeSetupHook, addDriverRunpath, autoFixElfFiles, cuda_compat ? null }:
makeSetupHook
{
name = "auto-add-cuda-compat-runpath-hook";
propagatedBuildInputs = [autoFixElfFiles];

substitutions = {
inherit (addDriverRunpath) driverLink;

# Hotfix Ofborg evaluation
libcudaPath = if final.flags.isJetsonBuild then "${cuda_compat}/compat" else null;
libcudaPath = if final.flags.isJetsonBuild then "${cuda_compat}/compat" else "";
};

meta.broken = !final.flags.isJetsonBuild;
Expand Down
8 changes: 5 additions & 3 deletions pkgs/development/python-modules/torch/bin.nix
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,11 @@ in buildPythonPackage {
rm -rf $out/bin
'';

postFixup = lib.optionalString stdenv.isLinux ''
addAutoPatchelfSearchPath "$out/${python.sitePackages}/torch/lib"
'';
elfAddRunpaths = [
"${lib.getLib cudaPackages.cuda_nvrtc}/lib"
"$ORIGIN"
];


# The wheel-binary is not stripped to avoid the error of `ImportError: libtorch_cuda_cpp.so: ELF load command address/offset not properly aligned.`.
dontStrip = true;
Expand Down
6 changes: 6 additions & 0 deletions pkgs/development/python-modules/torch/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,8 @@ in buildPythonPackage rec {
pybind11
pythonRelaxDepsHook
removeReferencesTo
] ++ lib.optionals stdenv.hostPlatform.isLinux [
cudaPackages.autoFixElfFiles
] ++ lib.optionals cudaSupport (with cudaPackages; [
autoAddDriverRunpath
cuda_nvcc
Expand Down Expand Up @@ -488,6 +490,10 @@ in buildPythonPackage rec {
install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libshm.dylib
'';

elfAddRunpaths = lib.optionals cudaSupport [
"${lib.getLib cudaPackages.cuda_nvrtc}/lib"
];

# Builds in 2+h with 2 cores, and ~15m with a big-parallel builder.
requiredSystemFeatures = [ "big-parallel" ];

Expand Down