From 94bbbb047180b59ef6f25f4d89e7a12b5fc6db63 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Sat, 25 Feb 2023 16:32:55 +0200 Subject: [PATCH 01/13] cudaPackages: point nvcc at a compatible -ccbin This is a hot-fix to un-break cuda-enabled packages (like tensorflow, jaxlib, faiss, opencv, ...) after the gcc11->gcc12 bump. We should probably build the whole downstream packages with a compatible stdenv (such as gcc11Stdenv for cudaPackages_11), but just pointing nvcc at the right compiler seems to do the trick We already used this hack for non-redist cudatoolkit. Now we use it more consistently. This commit also re-links cuda packages against libstdc++ from the same "compatible" gcc, rather than the current stdenv. We didn't test if this is necessary -> need to revise in further PRs. NOTE: long-term we should make it possible to override -ccbin and use e.g. clang --- .../compilers/cudatoolkit/common.nix | 40 +++++++++++++++---- .../redist/build-cuda-redist-package.nix | 14 ++++++- .../cudatoolkit/redist/overrides.nix | 39 +++++++++++++++++- .../libraries/science/math/nccl/default.nix | 31 ++++++++++---- 4 files changed, 106 insertions(+), 18 deletions(-) diff --git a/pkgs/development/compilers/cudatoolkit/common.nix b/pkgs/development/compilers/cudatoolkit/common.nix index e986ae2dc14da..a94f6fbdaf736 100644 --- a/pkgs/development/compilers/cudatoolkit/common.nix +++ b/pkgs/development/compilers/cudatoolkit/common.nix @@ -151,9 +151,31 @@ stdenv.mkDerivation rec { mkdir -p $out/nix-support echo "cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'" >> $out/nix-support/setup-hook - # Set the host compiler to be used by nvcc for CMake-based projects: + # Set the host compiler to be used by nvcc. + # FIXME: redist cuda_nvcc copy-pastes this code + + # For CMake-based projects: # https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables - echo "cmakeFlags+=' -DCUDA_HOST_COMPILER=${gcc}/bin'" >> $out/nix-support/setup-hook + # https://cmake.org/cmake/help/latest/envvar/CUDAHOSTCXX.html + # https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_HOST_COMPILER.html + + # For non-CMake projects: + # FIXME: results in "incompatible redefinition" warnings ...but we keep + # both this and cmake variables until we come up with a more general + # solution + # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin + + cat <> $out/nix-support/setup-hook + + cmakeFlags+=' -DCUDA_HOST_COMPILER=${gcc}/bin' + cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${gcc}/bin' + if [ -z "\''${CUDAHOSTCXX-}" ]; then + export CUDAHOSTCXX=${gcc}/bin; + fi + + export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${gcc}/bin' + EOF + # Move some libraries to the lib output so that programs that # depend on them don't pull in this entire monstrosity. @@ -167,10 +189,6 @@ stdenv.mkDerivation rec { mv $out/extras/CUPTI/lib64/libcupti* $out/lib ''} - # Set compiler for NVCC. - wrapProgram $out/bin/nvcc \ - --prefix PATH : ${gcc}/bin - # nvprof do not find any program to profile if LD_LIBRARY_PATH is not set wrapProgram $out/bin/nvprof \ --prefix LD_LIBRARY_PATH : $out/lib @@ -191,7 +209,15 @@ stdenv.mkDerivation rec { preFixup = let rpath = lib.concatStringsSep ":" [ (lib.makeLibraryPath (runtimeDependencies ++ [ "$lib" "$out" "$out/nvvm" ])) - "${stdenv.cc.cc.lib}/lib64" + + # The path to libstdc++ and such + # + # NB: + # 1. "gcc" (gcc-wrapper) here is what's exposed as cudaPackages.cudatoolkit.cc + # 2. "gcc.cc" is the unwrapped gcc + # 3. "gcc.cc.lib" is one of its outputs + "${gcc.cc.lib}/lib64" + "$out/jre/lib/amd64/jli" "$out/lib64" "$out/nvvm/lib64" diff --git a/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix b/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix index 9bbd7ea1da119..3bf9184eefabb 100644 --- a/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix +++ b/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix @@ -10,7 +10,8 @@ attrs: let arch = "linux-x86_64"; -in stdenv.mkDerivation { +in +stdenv.mkDerivation { inherit pname; inherit (attrs) version; @@ -29,7 +30,14 @@ in stdenv.mkDerivation { ]; buildInputs = [ - stdenv.cc.cc.lib + # autoPatchelfHook will search for a libstdc++ and we're giving it a + # "compatible" libstdc++ from the same toolchain that NVCC uses. + # + # E.g. it might happen that stdenv=gcc12Stdenv, but we build against cuda11 + # that only "supports" gcc11. Linking against gcc12's libraries we might + # sometimes actually sometimes encounter dynamic linkage errors at runtime + # NB: We don't actually know if this is the right thing to do + cudatoolkit.cc.cc.lib ]; dontBuild = true; @@ -43,6 +51,8 @@ in stdenv.mkDerivation { runHook postInstall ''; + passthru.stdenv = stdenv; + meta = { description = attrs.name; license = lib.licenses.unfree; diff --git a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix index bcf16db6e12eb..663af1db7632b 100644 --- a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix +++ b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix @@ -1,6 +1,8 @@ -final: prev: let +final: prev: +let inherit (prev) lib pkgs; -in (lib.filterAttrs (attr: _: (prev ? "${attr}")) { +in +(lib.filterAttrs (attr: _: (prev ? "${attr}")) { ### Overrides to fix the components of cudatoolkit-redist # Attributes that don't exist in the previous set are removed. @@ -20,6 +22,39 @@ in (lib.filterAttrs (attr: _: (prev ? "${attr}")) { prev.libcublas ]; + cuda_nvcc = prev.cuda_nvcc.overrideAttrs (oldAttrs: + let + inherit (prev.cudatoolkit) cc; + in + { + # Point NVCC at a compatible compiler + # FIXME: non-redist cudatoolkit copy-pastes this code + + # For CMake-based projects: + # https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables + # https://cmake.org/cmake/help/latest/envvar/CUDAHOSTCXX.html + # https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_HOST_COMPILER.html + + # For non-CMake projects: + # We prepend --compiler-bindir to nvcc flags. + # Downstream packages can override these, because NVCC + # uses the last --compiler-bindir it gets on the command line. + # FIXME: this results in "incompatible redefinition" warnings. + # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin + postInstall = (oldAttrs.postInstall or "") + '' + mkdir -p $out/nix-support + cat <> $out/nix-support/setup-hook + cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out' + cmakeFlags+=' -DCUDA_HOST_COMPILER=${cc}/bin' + cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${cc}/bin' + if [ -z "\''${CUDAHOSTCXX-}" ]; then + export CUDAHOSTCXX=${cc}/bin; + fi + export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${cc}/bin' + EOF + ''; + }); + cuda_nvprof = prev.cuda_nvprof.overrideAttrs (oldAttrs: { nativeBuildInputs = oldAttrs.nativeBuildInputs ++ [ pkgs.addOpenGLRunpath ]; buildInputs = oldAttrs.buildInputs ++ [ prev.cuda_cupti ]; diff --git a/pkgs/development/libraries/science/math/nccl/default.nix b/pkgs/development/libraries/science/math/nccl/default.nix index 99aed3a6a30ef..df76cda96b1d4 100644 --- a/pkgs/development/libraries/science/math/nccl/default.nix +++ b/pkgs/development/libraries/science/math/nccl/default.nix @@ -1,11 +1,9 @@ { lib, stdenv, fetchFromGitHub, which, cudaPackages, addOpenGLRunpath }: -let - inherit (cudaPackages) cudatoolkit; -in +with cudaPackages; stdenv.mkDerivation rec { - name = "nccl-${version}-cuda-${cudatoolkit.majorVersion}"; + name = "nccl-${version}-cuda-${cudaPackages.cudaMajorVersion}"; version = "2.16.5-1"; src = fetchFromGitHub { @@ -17,16 +15,35 @@ stdenv.mkDerivation rec { outputs = [ "out" "dev" ]; - nativeBuildInputs = [ which addOpenGLRunpath ]; + nativeBuildInputs = [ + which + addOpenGLRunpath + cuda_nvcc + ]; - buildInputs = [ cudatoolkit ]; + buildInputs = [ + cuda_cudart + ]; preConfigure = '' patchShebangs src/collectives/device/gen_rules.sh + '' + # We need NVCC to use a compatible backend compiler (we maintain a link to + # that in `cudatoolkit.cc`). We ship NVCC with a setup-hook that *prepends* + # the correct -ccbin to nvcc flags. NCCL's Makefile, however, appends another + # -ccbin, which points at the host platform's compiler, coming from the + # `stdenv`. Confer + # https://github.com/NVIDIA/nccl/blob/f3d51667838f7542df8ea32ea4e144d812b3ed7c/makefiles/common.mk#L65 + # Since NVCC will use the last -ccbin on the command-line, we append the correct path again. + # We hope it's a temporary solution + + '' + export NVCC_APPEND_FLAGS+=' --compiler-bindir=${cudatoolkit.cc}/bin' ''; makeFlags = [ - "CUDA_HOME=${cudatoolkit}" + "CUDA_HOME=${cuda_nvcc}" + "CUDA_LIB=${cuda_cudart}/lib64" + "CUDA_INC=${cuda_cudart}/include" "PREFIX=$(out)" ]; From cf7fb1d08f928f48725f15e595cbb84793278379 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Mon, 27 Feb 2023 14:54:09 +0200 Subject: [PATCH 02/13] python3Packages.tensorflow: add cudaCapabilities argument Rearrange tensorflow to allow overriding cudaCapabilities. This is needed when debugging the tensorflow derivation --- pkgs/development/compilers/cudatoolkit/flags.nix | 3 +-- pkgs/development/python-modules/tensorflow/default.nix | 8 +++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pkgs/development/compilers/cudatoolkit/flags.nix b/pkgs/development/compilers/cudatoolkit/flags.nix index 8e1e54723b2e4..9d7b7f884ad2f 100644 --- a/pkgs/development/compilers/cudatoolkit/flags.nix +++ b/pkgs/development/compilers/cudatoolkit/flags.nix @@ -1,6 +1,6 @@ { config , lib -, cudatoolkit +, cudaVersion }: # Type aliases @@ -13,7 +13,6 @@ let inherit (lib) attrsets lists strings trivial versions; - cudaVersion = cudatoolkit.version; # Flags are determined based on your CUDA toolkit by default. You may benefit # from improved performance, reduced file size, or greater hardware suppport by diff --git a/pkgs/development/python-modules/tensorflow/default.nix b/pkgs/development/python-modules/tensorflow/default.nix index f7d920c372217..f18a924c31fa2 100644 --- a/pkgs/development/python-modules/tensorflow/default.nix +++ b/pkgs/development/python-modules/tensorflow/default.nix @@ -17,7 +17,9 @@ # that in nix as well. It would make some things easier and less confusing, but # it would also make the default tensorflow package unfree. See # https://groups.google.com/a/tensorflow.org/forum/#!topic/developers/iRCt5m4qUz0 -, cudaSupport ? false, cudaPackages ? {} +, cudaSupport ? false +, cudaPackages ? { } +, cudaCapabilities ? cudaPackages.cudaFlags.cudaCapabilities , mklSupport ? false, mkl , tensorboardSupport ? true # XLA without CUDA is broken @@ -30,7 +32,7 @@ }: let - inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl; + inherit (cudaPackages) cudatoolkit cudnn nccl; in assert cudaSupport -> cudatoolkit != null @@ -301,7 +303,7 @@ let TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkit_joined},${cudnn},${nccl}"; GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin"; GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/gcc"; - TF_CUDA_COMPUTE_CAPABILITIES = builtins.concatStringsSep "," cudaFlags.cudaRealArches; + TF_CUDA_COMPUTE_CAPABILITIES = lib.concatStringsSep "," cudaCapabilities; postPatch = '' # bazel 3.3 should work just as well as bazel 3.1 From 79397957e876ef7fe6eccbcb58d23fb5c58f121c Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Mon, 27 Feb 2023 16:21:56 +0200 Subject: [PATCH 03/13] cudaPackages.nccl: respect cudaCapabilities --- .../libraries/science/math/nccl/default.nix | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/pkgs/development/libraries/science/math/nccl/default.nix b/pkgs/development/libraries/science/math/nccl/default.nix index df76cda96b1d4..c047961c6c003 100644 --- a/pkgs/development/libraries/science/math/nccl/default.nix +++ b/pkgs/development/libraries/science/math/nccl/default.nix @@ -1,8 +1,18 @@ -{ lib, stdenv, fetchFromGitHub, which, cudaPackages, addOpenGLRunpath }: +{ lib +, backendStdenv +, fetchFromGitHub +, which +, cudaPackages ? { } +, addOpenGLRunpath +}: with cudaPackages; -stdenv.mkDerivation rec { +let + # Output looks like "-gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_86,code=compute_86" + gencode = lib.concatStringsSep " " cudaFlags.cudaGencode; +in +backendStdenv.mkDerivation rec { name = "nccl-${version}-cuda-${cudaPackages.cudaMajorVersion}"; version = "2.16.5-1"; @@ -27,17 +37,9 @@ stdenv.mkDerivation rec { preConfigure = '' patchShebangs src/collectives/device/gen_rules.sh - '' - # We need NVCC to use a compatible backend compiler (we maintain a link to - # that in `cudatoolkit.cc`). We ship NVCC with a setup-hook that *prepends* - # the correct -ccbin to nvcc flags. NCCL's Makefile, however, appends another - # -ccbin, which points at the host platform's compiler, coming from the - # `stdenv`. Confer - # https://github.com/NVIDIA/nccl/blob/f3d51667838f7542df8ea32ea4e144d812b3ed7c/makefiles/common.mk#L65 - # Since NVCC will use the last -ccbin on the command-line, we append the correct path again. - # We hope it's a temporary solution - + '' - export NVCC_APPEND_FLAGS+=' --compiler-bindir=${cudatoolkit.cc}/bin' + makeFlagsArray+=( + "NVCC_GENCODE=${gencode}" + ) ''; makeFlags = [ From e305011223c940a8dd661f64eb5cd5384c15ddbe Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Wed, 1 Mar 2023 16:39:04 +0200 Subject: [PATCH 04/13] cudaPackages_12.nccl: fix new missing inputs --- pkgs/development/libraries/science/math/nccl/default.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkgs/development/libraries/science/math/nccl/default.nix b/pkgs/development/libraries/science/math/nccl/default.nix index c047961c6c003..4f82de8e8b6fa 100644 --- a/pkgs/development/libraries/science/math/nccl/default.nix +++ b/pkgs/development/libraries/science/math/nccl/default.nix @@ -33,6 +33,8 @@ backendStdenv.mkDerivation rec { buildInputs = [ cuda_cudart + ] ++ lib.optionals (lib.versionAtLeast cudaVersion "12.0.0") [ + cuda_cccl ]; preConfigure = '' From d378cc6fb23d67f3d9f86c39051f810c563789ca Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Mon, 27 Feb 2023 14:58:14 +0200 Subject: [PATCH 05/13] opencv4: respect config.cudaCapabilities This is needed for faster builds when debugging the opencv derivation, and it's more consistent with other cuda-enabled packages -DCUDA_GENERATION seems to expect architecture names, so we refactor cudaFlags to facilitate easier extraction of the configured archnames --- .../science/math/mxnet/default.nix | 2 +- .../compilers/cudatoolkit/flags.nix | 140 +++++++++++------- pkgs/development/libraries/opencv/4.x.nix | 13 +- .../libraries/science/math/magma/generic.nix | 6 +- .../libraries/science/math/nccl/default.nix | 2 +- .../python-modules/jaxlib/default.nix | 2 +- 6 files changed, 102 insertions(+), 63 deletions(-) diff --git a/pkgs/applications/science/math/mxnet/default.nix b/pkgs/applications/science/math/mxnet/default.nix index c1a329c608864..240a1759397fe 100644 --- a/pkgs/applications/science/math/mxnet/default.nix +++ b/pkgs/applications/science/math/mxnet/default.nix @@ -50,7 +50,7 @@ stdenv.mkDerivation rec { "-DUSE_OLDCMAKECUDA=ON" # see https://github.com/apache/incubator-mxnet/issues/10743 "-DCUDA_ARCH_NAME=All" "-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc" - "-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.cudaRealArches}" + "-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.realArches}" ] else [ "-DUSE_CUDA=OFF" ]) ++ lib.optional (!cudnnSupport) "-DUSE_CUDNN=OFF"; diff --git a/pkgs/development/compilers/cudatoolkit/flags.nix b/pkgs/development/compilers/cudatoolkit/flags.nix index 9d7b7f884ad2f..a43485a7dcfd4 100644 --- a/pkgs/development/compilers/cudatoolkit/flags.nix +++ b/pkgs/development/compilers/cudatoolkit/flags.nix @@ -18,8 +18,15 @@ let # from improved performance, reduced file size, or greater hardware suppport by # passing a configuration based on your specific GPU environment. # - # config.cudaCapabilities: list of hardware generations to support (e.g., "8.0") - # config.cudaForwardCompat: bool for compatibility with future GPU generations + # config.cudaCapabilities :: List Capability + # List of hardware generations to build + # Last item is considered the optional forward-compatibility arch + # E.g. [ "8.0" ] + # + # config.cudaForwardCompat :: Bool + # Whether to include the forward compatibility gencode (+PTX) + # to support future GPU generations: + # E.g. true # # Please see the accompanying documentation or https://github.com/NixOS/nixpkgs/pull/205351 @@ -39,6 +46,9 @@ let # GPUs which are supported by the provided CUDA version. supportedGpus = builtins.filter isSupported gpus; + # supportedCapabilities :: List Capability + supportedCapabilities = lists.map (gpu: gpu.computeCapability) supportedGpus; + # cudaArchNameToVersions :: AttrSet String (List String) # Maps the name of a GPU architecture to different versions of that architecture. # For example, "Ampere" maps to [ "8.0" "8.6" "8.7" ]. @@ -49,12 +59,6 @@ let (gpu: gpu.archName) supportedGpus; - # cudaArchNames :: List String - # NOTE: It's important that we don't rely on builtins.attrNames cudaArchNameToVersions here; - # otherwise, we'll get the names sorted in alphabetical order. The JSON array we read them - # from is already sorted, so we'll preserve that order here. - cudaArchNames = lists.unique (lists.map (gpu: gpu.archName) supportedGpus); - # cudaComputeCapabilityToName :: AttrSet String String # Maps the version of a GPU architecture to the name of that architecture. # For example, "8.0" maps to "Ampere". @@ -67,23 +71,6 @@ let supportedGpus ); - # cudaComputeCapabilities :: List String - # NOTE: It's important that we don't rely on builtins.attrNames cudaComputeCapabilityToName here; - # otherwise, we'll get the versions sorted in alphabetical order. The JSON array we read them - # from is already sorted, so we'll preserve that order here. - # Use the user-provided list of CUDA capabilities if it's provided. - cudaComputeCapabilities = config.cudaCapabilities - or (lists.map (gpu: gpu.computeCapability) supportedGpus); - - # cudaForwardComputeCapability :: String - cudaForwardComputeCapability = (lists.last cudaComputeCapabilities) + "+PTX"; - - # cudaComputeCapabilitiesAndForward :: List String - # The list of supported CUDA architectures, including the forward compatibility architecture. - # If forward compatibility is disabled, this will be the same as cudaComputeCapabilities. - cudaComputeCapabilitiesAndForward = cudaComputeCapabilities - ++ lists.optional (config.cudaForwardCompat or true) cudaForwardComputeCapability; - # dropDot :: String -> String dropDot = ver: builtins.replaceStrings [ "." ] [ "" ] ver; @@ -101,38 +88,79 @@ let "-gencode=arch=compute_${dropDot computeCapability},code=${feat}_${dropDot computeCapability}" ); - # cudaRealArches :: List String - # The real architectures are physical architectures supported by the CUDA version. - # For example, "sm_80". - cudaRealArches = archMapper "sm" cudaComputeCapabilities; - - # cudaVirtualArches :: List String - # The virtual architectures are typically used for forward compatibility, when trying to support - # an architecture newer than the CUDA version allows. - # For example, "compute_80". - cudaVirtualArches = archMapper "compute" cudaComputeCapabilities; - - # cudaArches :: List String - # By default, build for all supported architectures and forward compatibility via a virtual - # architecture for the newest supported architecture. - cudaArches = cudaRealArches ++ - lists.optional (config.cudaForwardCompat or true) (lists.last cudaVirtualArches); - - # cudaGencode :: List String - # A list of CUDA gencode arguments to pass to NVCC. - cudaGencode = - let - base = gencodeMapper "sm" cudaComputeCapabilities; - forwardCompat = gencodeMapper "compute" [ (lists.last cudaComputeCapabilities) ]; - in - base ++ lists.optionals (config.cudaForwardCompat or true) forwardCompat; + formatCapabilities = { cudaCapabilities, enableForwardCompat ? true }: rec { + inherit cudaCapabilities enableForwardCompat; + + # forwardCapability :: String + # Forward "compute" capability, a.k.a PTX + # E.g. "8.6+PTX" + forwardCapability = (lists.last cudaCapabilities) + "+PTX"; + + # capabilitiesAndForward :: List String + # The list of supported CUDA architectures, including the forward compatibility architecture. + # If forward compatibility is disabled, this will be the same as cudaCapabilities. + # E.g. [ "7.5" "8.6" "8.6+PTX" ] + capabilitiesAndForward = cudaCapabilities ++ lists.optionals enableForwardCompat [ forwardCapability ]; + + # archNames :: List String + # E.g. [ "Turing" "Ampere" ] + archNames = lists.unique (builtins.map (cap: cudaComputeCapabilityToName.${cap}) cudaCapabilities); + + # realArches :: List String + # The real architectures are physical architectures supported by the CUDA version. + # E.g. [ "sm_75" "sm_86" ] + realArches = archMapper "sm" cudaCapabilities; + + # virtualArches :: List String + # The virtual architectures are typically used for forward compatibility, when trying to support + # an architecture newer than the CUDA version allows. + # E.g. [ "compute_75" "compute_86" ] + virtualArches = archMapper "compute" cudaCapabilities; + + # arches :: List String + # By default, build for all supported architectures and forward compatibility via a virtual + # architecture for the newest supported architecture. + # E.g. [ "sm_75" "sm_86" "compute_86" ] + arches = realArches ++ + lists.optional enableForwardCompat (lists.last virtualArches); + + # gencode :: List String + # A list of CUDA gencode arguments to pass to NVCC. + # E.g. [ "-gencode=arch=compute_75,code=sm_75" ... "-gencode=arch=compute_86,code=compute_86" ] + gencode = + let + base = gencodeMapper "sm" cudaCapabilities; + forward = gencodeMapper "compute" [ (lists.last cudaCapabilities) ]; + in + base ++ lib.optionals enableForwardCompat forward; + }; in +# When changing names or formats: pause, validate, and update the assert +assert (formatCapabilities { cudaCapabilities = [ "7.5" "8.6" ]; }) == { + cudaCapabilities = [ "7.5" "8.6" ]; + enableForwardCompat = true; + + capabilitiesAndForward = [ "7.5" "8.6" "8.6+PTX" ]; + forwardCapability = "8.6+PTX"; + + archNames = [ "Turing" "Ampere" ]; + realArches = [ "sm_75" "sm_86" ]; + virtualArches = [ "compute_75" "compute_86" ]; + arches = [ "sm_75" "sm_86" "compute_86" ]; + + gencode = [ "-gencode=arch=compute_75,code=sm_75" "-gencode=arch=compute_86,code=sm_86" "-gencode=arch=compute_86,code=compute_86" ]; +}; { - inherit - cudaArchNames - cudaArchNameToVersions cudaComputeCapabilityToName - cudaRealArches cudaVirtualArches cudaArches - cudaGencode; - cudaCapabilities = cudaComputeCapabilitiesAndForward; + # formatCapabilities :: { cudaCapabilities: List Capability, cudaForwardCompat: Boolean } -> { ... } + inherit formatCapabilities; + + # cudaArchNameToVersions :: String => String + inherit cudaArchNameToVersions; + + # cudaComputeCapabilityToName :: String => String + inherit cudaComputeCapabilityToName; +} // formatCapabilities { + cudaCapabilities = config.cudaCapabilities or supportedCapabilities; + enableForwardCompat = config.cudaForwardCompat or true; } diff --git a/pkgs/development/libraries/opencv/4.x.nix b/pkgs/development/libraries/opencv/4.x.nix index ac021c2b61082..a9f7b0304e84d 100644 --- a/pkgs/development/libraries/opencv/4.x.nix +++ b/pkgs/development/libraries/opencv/4.x.nix @@ -37,7 +37,7 @@ , enableContrib ? true , enableCuda ? (config.cudaSupport or false) && stdenv.hostPlatform.isx86_64 -, cudatoolkit +, cudaPackages ? { } , nvidia-optical-flow-sdk , enableUnfree ? false @@ -79,6 +79,9 @@ }: let + inherit (cudaPackages) cudatoolkit; + inherit (cudaPackages.cudaFlags) cudaCapabilities; + version = "4.7.0"; src = fetchFromGitHub { @@ -342,6 +345,14 @@ stdenv.mkDerivation { "-DCUDA_FAST_MATH=ON" "-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc" "-DCUDA_NVCC_FLAGS=--expt-relaxed-constexpr" + + # OpenCV respects at least three variables: + # -DCUDA_GENERATION takes a single arch name, e.g. Volta + # -DCUDA_ARCH_BIN takes a semi-colon separated list of real arches, e.g. "8.0;8.6" + # -DCUDA_ARCH_PTX takes the virtual arch, e.g. "8.6" + "-DCUDA_ARCH_BIN=${lib.concatStringsSep ";" cudaCapabilities}" + "-DCUDA_ARCH_PTX=${lib.last cudaCapabilities}" + "-DNVIDIA_OPTICAL_FLOW_2_0_HEADERS_PATH=${nvidia-optical-flow-sdk}" ] ++ lib.optionals stdenv.isDarwin [ "-DWITH_OPENCL=OFF" diff --git a/pkgs/development/libraries/science/math/magma/generic.nix b/pkgs/development/libraries/science/math/magma/generic.nix index ab0a2125ec00e..e27107ca15d80 100644 --- a/pkgs/development/libraries/science/math/magma/generic.nix +++ b/pkgs/development/libraries/science/math/magma/generic.nix @@ -37,13 +37,13 @@ let # lists.subtractLists a b = b - a # For CUDA - supportedCudaSmArches = lists.intersectLists cudaFlags.cudaRealArches supportedGpuTargets; + supportedCudaSmArches = lists.intersectLists cudaFlags.realArches supportedGpuTargets; # Subtract the supported SM architectures from the real SM architectures to get the unsupported # SM architectures. - unsupportedCudaSmArches = lists.subtractLists supportedCudaSmArches cudaFlags.cudaRealArches; + unsupportedCudaSmArches = lists.subtractLists supportedCudaSmArches cudaFlags.realArches; # For ROCm - # NOTE: The hip.gpuTargets are prefixed with "gfx" instead of "sm" like cudaFlags.cudaRealArches. + # NOTE: The hip.gpuTargets are prefixed with "gfx" instead of "sm" like cudaFlags.realArches. # For some reason, Magma's CMakeLists.txt file does not handle the "gfx" prefix, so we must # remove it. rocmArches = lists.map (x: strings.removePrefix "gfx" x) hip.gpuTargets; diff --git a/pkgs/development/libraries/science/math/nccl/default.nix b/pkgs/development/libraries/science/math/nccl/default.nix index 4f82de8e8b6fa..155e863bf21e4 100644 --- a/pkgs/development/libraries/science/math/nccl/default.nix +++ b/pkgs/development/libraries/science/math/nccl/default.nix @@ -10,7 +10,7 @@ with cudaPackages; let # Output looks like "-gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_86,code=compute_86" - gencode = lib.concatStringsSep " " cudaFlags.cudaGencode; + gencode = lib.concatStringsSep " " cudaFlags.gencode; in backendStdenv.mkDerivation rec { name = "nccl-${version}-cuda-${cudaPackages.cudaMajorVersion}"; diff --git a/pkgs/development/python-modules/jaxlib/default.nix b/pkgs/development/python-modules/jaxlib/default.nix index 2c13defe43838..ad48af827ee56 100644 --- a/pkgs/development/python-modules/jaxlib/default.nix +++ b/pkgs/development/python-modules/jaxlib/default.nix @@ -164,7 +164,7 @@ let build --action_env TF_CUDA_PATHS="${cudatoolkit_joined},${cudnn},${nccl}" build --action_env TF_CUDA_VERSION="${lib.versions.majorMinor cudatoolkit.version}" build --action_env TF_CUDNN_VERSION="${lib.versions.major cudnn.version}" - build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.cudaRealArches}" + build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.realArches}" '' + '' CFG ''; From 5f4bdbe6c387bf740025581d94bbfba9a887c76f Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Mon, 27 Feb 2023 16:28:07 +0200 Subject: [PATCH 06/13] python3Packages.tensorflow: fix `GLIBCXX_3.4.30' not found Make tensorflow (and a bunch of ther things) use CUDA-compatible toolchain. Introduces cudaPackages.backendStdenv --- .../compilers/cudatoolkit/common.nix | 54 +++++++------------ .../compilers/cudatoolkit/extension.nix | 19 +++++-- .../redist/build-cuda-redist-package.nix | 11 ++-- .../cudatoolkit/redist/overrides.nix | 3 +- .../libraries/science/math/cudnn/generic.nix | 8 +-- .../science/math/tensorrt/generic.nix | 8 +-- .../python-modules/tensorflow/default.nix | 46 +++++++++++----- .../cuda/cuda-library-samples/generic.nix | 8 +-- 8 files changed, 88 insertions(+), 69 deletions(-) diff --git a/pkgs/development/compilers/cudatoolkit/common.nix b/pkgs/development/compilers/cudatoolkit/common.nix index a94f6fbdaf736..e6d7cbc377cf1 100644 --- a/pkgs/development/compilers/cudatoolkit/common.nix +++ b/pkgs/development/compilers/cudatoolkit/common.nix @@ -11,7 +11,7 @@ args@ , fetchurl , fontconfig , freetype -, gcc +, gcc # :: String , gdk-pixbuf , glib , glibc @@ -22,13 +22,13 @@ args@ , perl , python3 , requireFile -, stdenv +, backendStdenv # E.g. gcc11Stdenv, set in extension.nix , unixODBC , xorg , zlib }: -stdenv.mkDerivation rec { +backendStdenv.mkDerivation rec { pname = "cudatoolkit"; inherit version runPatches; @@ -146,37 +146,24 @@ stdenv.mkDerivation rec { # Fix builds with newer glibc version sed -i "1 i#define _BITS_FLOATN_H" "$out/include/host_defines.h" - - # Ensure that cmake can find CUDA. + '' + + # Point NVCC at a compatible compiler + # FIXME: redist cuda_nvcc copy-pastes this code + # Refer to comments in the overrides for cuda_nvcc for explanation + # CUDA_TOOLKIT_ROOT_DIR is legacy, + # Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables + '' mkdir -p $out/nix-support - echo "cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'" >> $out/nix-support/setup-hook - - # Set the host compiler to be used by nvcc. - # FIXME: redist cuda_nvcc copy-pastes this code - - # For CMake-based projects: - # https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables - # https://cmake.org/cmake/help/latest/envvar/CUDAHOSTCXX.html - # https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_HOST_COMPILER.html - - # For non-CMake projects: - # FIXME: results in "incompatible redefinition" warnings ...but we keep - # both this and cmake variables until we come up with a more general - # solution - # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin - cat <> $out/nix-support/setup-hook - - cmakeFlags+=' -DCUDA_HOST_COMPILER=${gcc}/bin' - cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${gcc}/bin' + cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out' + cmakeFlags+=' -DCUDA_HOST_COMPILER=${backendStdenv.cc}/bin' + cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${backendStdenv.cc}/bin' if [ -z "\''${CUDAHOSTCXX-}" ]; then - export CUDAHOSTCXX=${gcc}/bin; + export CUDAHOSTCXX=${backendStdenv.cc}/bin; fi - - export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${gcc}/bin' + export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin' EOF - # Move some libraries to the lib output so that programs that # depend on them don't pull in this entire monstrosity. mkdir -p $lib/lib @@ -212,11 +199,10 @@ stdenv.mkDerivation rec { # The path to libstdc++ and such # - # NB: - # 1. "gcc" (gcc-wrapper) here is what's exposed as cudaPackages.cudatoolkit.cc - # 2. "gcc.cc" is the unwrapped gcc - # 3. "gcc.cc.lib" is one of its outputs - "${gcc.cc.lib}/lib64" + # `backendStdenv` is the cuda-compatible toolchain that we pick in + # extension.nix; we hand it to NVCC to use as a back-end, and we link + # cudatoolkit's binaries against its libstdc++ + "${backendStdenv.cc.cc.lib}/lib64" "$out/jre/lib/amd64/jli" "$out/lib64" @@ -286,7 +272,7 @@ stdenv.mkDerivation rec { popd ''; passthru = { - cc = gcc; + cc = backendStdenv.cc; majorMinorVersion = lib.versions.majorMinor version; majorVersion = lib.versions.majorMinor version; }; diff --git a/pkgs/development/compilers/cudatoolkit/extension.nix b/pkgs/development/compilers/cudatoolkit/extension.nix index c11f12b118a2f..72cab97f8ffc5 100644 --- a/pkgs/development/compilers/cudatoolkit/extension.nix +++ b/pkgs/development/compilers/cudatoolkit/extension.nix @@ -7,11 +7,24 @@ final: prev: let # Version info for the classic cudatoolkit packages that contain everything that is in redist. cudatoolkitVersions = final.lib.importTOML ./versions.toml; + finalVersion = cudatoolkitVersions.${final.cudaVersion}; + + # Exposed as cudaPackages.backendStdenv. + # We don't call it just "stdenv" to avoid confusion: e.g. this toolchain doesn't contain nvcc. + # Instead, it's the back-end toolchain for nvcc to use. + # We also use this to link a compatible libstdc++ (backendStdenv.cc.cc.lib) + # Cf. https://github.com/NixOS/nixpkgs/pull/218265 for context + backendStdenv = prev.pkgs."${finalVersion.gcc}Stdenv"; + ### Add classic cudatoolkit package - cudatoolkit = buildCudaToolkitPackage ((attrs: attrs // { gcc = prev.pkgs.${attrs.gcc}; }) cudatoolkitVersions.${final.cudaVersion}); + cudatoolkit = buildCudaToolkitPackage (finalVersion // { inherit backendStdenv; }); cudaFlags = final.callPackage ./flags.nix {}; -in { - inherit cudatoolkit cudaFlags; +in +{ + inherit + backendStdenv + cudatoolkit + cudaFlags; } diff --git a/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix b/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix index 3bf9184eefabb..1b216ee625a89 100644 --- a/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix +++ b/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix @@ -1,5 +1,5 @@ { lib -, stdenv +, backendStdenv , fetchurl , autoPatchelfHook , autoAddOpenGLRunpathHook @@ -11,7 +11,7 @@ attrs: let arch = "linux-x86_64"; in -stdenv.mkDerivation { +backendStdenv.mkDerivation { inherit pname; inherit (attrs) version; @@ -33,11 +33,8 @@ stdenv.mkDerivation { # autoPatchelfHook will search for a libstdc++ and we're giving it a # "compatible" libstdc++ from the same toolchain that NVCC uses. # - # E.g. it might happen that stdenv=gcc12Stdenv, but we build against cuda11 - # that only "supports" gcc11. Linking against gcc12's libraries we might - # sometimes actually sometimes encounter dynamic linkage errors at runtime # NB: We don't actually know if this is the right thing to do - cudatoolkit.cc.cc.lib + backendStdenv.cc.cc.lib ]; dontBuild = true; @@ -51,7 +48,7 @@ stdenv.mkDerivation { runHook postInstall ''; - passthru.stdenv = stdenv; + passthru.stdenv = backendStdenv; meta = { description = attrs.name; diff --git a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix index 663af1db7632b..96b782d8c990d 100644 --- a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix +++ b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix @@ -24,7 +24,7 @@ in cuda_nvcc = prev.cuda_nvcc.overrideAttrs (oldAttrs: let - inherit (prev.cudatoolkit) cc; + inherit (prev.backendStdenv) cc; in { # Point NVCC at a compatible compiler @@ -44,7 +44,6 @@ in postInstall = (oldAttrs.postInstall or "") + '' mkdir -p $out/nix-support cat <> $out/nix-support/setup-hook - cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out' cmakeFlags+=' -DCUDA_HOST_COMPILER=${cc}/bin' cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${cc}/bin' if [ -z "\''${CUDAHOSTCXX-}" ]; then diff --git a/pkgs/development/libraries/science/math/cudnn/generic.nix b/pkgs/development/libraries/science/math/cudnn/generic.nix index d4e1f641a956e..b2844ae6b074c 100644 --- a/pkgs/development/libraries/science/math/cudnn/generic.nix +++ b/pkgs/development/libraries/science/math/cudnn/generic.nix @@ -1,11 +1,11 @@ { - stdenv, + backendStdenv, lib, zlib, useCudatoolkitRunfile ? false, cudaVersion, cudaMajorVersion, - cudatoolkit, # if cuda>=11: only used for .cc + cudatoolkit, # For cuda < 11 libcublas ? null, # cuda <11 doesn't ship redist packages autoPatchelfHook, autoAddOpenGLRunpathHook, @@ -26,7 +26,7 @@ maxCudaVersion, }: assert useCudatoolkitRunfile || (libcublas != null); let - inherit (cudatoolkit) cc; + inherit (backendStdenv) cc; inherit (lib) lists strings trivial versions; # majorMinorPatch :: String -> String @@ -46,7 +46,7 @@ assert useCudatoolkitRunfile || (libcublas != null); let then cudatoolkit else libcublas; in - stdenv.mkDerivation { + backendStdenv.mkDerivation { pname = "cudatoolkit-${cudaMajorVersion}-cudnn"; version = versionTriple; diff --git a/pkgs/development/libraries/science/math/tensorrt/generic.nix b/pkgs/development/libraries/science/math/tensorrt/generic.nix index 3447087051f1e..31090f715c222 100644 --- a/pkgs/development/libraries/science/math/tensorrt/generic.nix +++ b/pkgs/development/libraries/science/math/tensorrt/generic.nix @@ -1,5 +1,5 @@ { lib -, stdenv +, backendStdenv , requireFile , autoPatchelfHook , autoAddOpenGLRunpathHook @@ -18,7 +18,7 @@ assert lib.assertMsg (lib.strings.versionAtLeast cudnn.version fileVersionCudnn) "This version of TensorRT requires at least cuDNN ${fileVersionCudnn} (current version is ${cudnn.version})"; -stdenv.mkDerivation rec { +backendStdenv.mkDerivation rec { pname = "cudatoolkit-${cudatoolkit.majorVersion}-tensorrt"; version = fullVersion; src = requireFile rec { @@ -45,7 +45,7 @@ stdenv.mkDerivation rec { # Used by autoPatchelfHook buildInputs = [ - cudatoolkit.cc.cc.lib # libstdc++ + backendStdenv.cc.cc.lib # libstdc++ cudatoolkit cudnn ]; @@ -74,6 +74,8 @@ stdenv.mkDerivation rec { "$out/lib/libnvinfer_builder_resource.so.${mostOfVersion}" ''; + passthru.stdenv = backendStdenv; + meta = with lib; { # Check that the cudatoolkit version satisfies our min/max constraints (both # inclusive). We mark the package as broken if it fails to satisfies the diff --git a/pkgs/development/python-modules/tensorflow/default.nix b/pkgs/development/python-modules/tensorflow/default.nix index f18a924c31fa2..adc7b1c1e0b3f 100644 --- a/pkgs/development/python-modules/tensorflow/default.nix +++ b/pkgs/development/python-modules/tensorflow/default.nix @@ -32,6 +32,26 @@ }: let + originalStdenv = stdenv; +in +let + # Tensorflow looks at many toolchain-related variables which may diverge. + # + # Toolchain for cuda-enabled builds. + # We want to achieve two things: + # 1. NVCC should use a compatible back-end (e.g. gcc11 for cuda11) + # 2. Normal C++ files should be compiled with the same toolchain, + # to avoid potential weird dynamic linkage errors at runtime. + # This may not be necessary though + # + # Toolchain for Darwin: + # clang 7 fails to emit a symbol for + # __ZN4llvm11SmallPtrSetIPKNS_10AllocaInstELj8EED1Ev in any of the + # translation units, so the build fails at link time + stdenv = + if cudaSupport then cudaPackages.backendStdenv + else if originalStdenv.isDarwin then llvmPackages_11.stdenv + else originalStdenv; inherit (cudaPackages) cudatoolkit cudnn nccl; in @@ -44,6 +64,7 @@ assert ! (stdenv.isDarwin && cudaSupport); let withTensorboard = (pythonOlder "3.6") || tensorboardSupport; + # FIXME: migrate to redist cudaPackages cudatoolkit_joined = symlinkJoin { name = "${cudatoolkit.name}-merged"; paths = [ @@ -56,10 +77,13 @@ let ]; }; + # Tensorflow expects bintools at hard-coded paths, e.g. /usr/bin/ar + # The only way to overcome that is to set GCC_HOST_COMPILER_PREFIX, + # but that path must contain cc as well, so we merge them cudatoolkit_cc_joined = symlinkJoin { - name = "${cudatoolkit.cc.name}-merged"; + name = "${stdenv.cc.name}-merged"; paths = [ - cudatoolkit.cc + stdenv.cc binutils.bintools # for ar, dwp, nm, objcopy, objdump, strip ]; }; @@ -175,12 +199,7 @@ let ''; }) else _bazel-build; - _bazel-build = (buildBazelPackage.override (lib.optionalAttrs stdenv.isDarwin { - # clang 7 fails to emit a symbol for - # __ZN4llvm11SmallPtrSetIPKNS_10AllocaInstELj8EED1Ev in any of the - # translation units, so the build fails at link time - stdenv = llvmPackages_11.stdenv; - })) { + _bazel-build = buildBazelPackage.override { inherit stdenv; } { name = "${pname}-${version}"; bazel = bazel_5; @@ -211,12 +230,13 @@ let flatbuffers-core giflib grpc - icu + # Necessary to fix the "`GLIBCXX_3.4.30' not found" error + (icu.override { inherit stdenv; }) jsoncpp libjpeg_turbo libpng lmdb-core - pybind11 + (pybind11.overridePythonAttrs (_: { inherit stdenv; })) snappy sqlite ] ++ lib.optionals cudaSupport [ @@ -301,10 +321,12 @@ let TF_NEED_CUDA = tfFeature cudaSupport; TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkit_joined},${cudnn},${nccl}"; - GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin"; - GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/gcc"; TF_CUDA_COMPUTE_CAPABILITIES = lib.concatStringsSep "," cudaCapabilities; + # Needed even when we override stdenv: e.g. for ar + GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin"; + GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/cc"; + postPatch = '' # bazel 3.3 should work just as well as bazel 3.1 rm -f .bazelversion diff --git a/pkgs/test/cuda/cuda-library-samples/generic.nix b/pkgs/test/cuda/cuda-library-samples/generic.nix index e01664bab3191..e9a481c94a7a4 100644 --- a/pkgs/test/cuda/cuda-library-samples/generic.nix +++ b/pkgs/test/cuda/cuda-library-samples/generic.nix @@ -1,4 +1,4 @@ -{ lib, stdenv, fetchFromGitHub +{ lib, backendStdenv, fetchFromGitHub , cmake, addOpenGLRunpath , cudatoolkit , cutensor @@ -35,13 +35,13 @@ let in { - cublas = stdenv.mkDerivation (commonAttrs // { + cublas = backendStdenv.mkDerivation (commonAttrs // { pname = "cuda-library-samples-cublas"; src = "${src}/cuBLASLt"; }); - cusolver = stdenv.mkDerivation (commonAttrs // { + cusolver = backendStdenv.mkDerivation (commonAttrs // { pname = "cuda-library-samples-cusolver"; src = "${src}/cuSOLVER"; @@ -49,7 +49,7 @@ in sourceRoot = "cuSOLVER/gesv"; }); - cutensor = stdenv.mkDerivation (commonAttrs // { + cutensor = backendStdenv.mkDerivation (commonAttrs // { pname = "cuda-library-samples-cutensor"; src = "${src}/cuTENSOR"; From 17248123b6ae01b89a25de730ea890276acd69b2 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Tue, 28 Feb 2023 18:07:45 +0200 Subject: [PATCH 07/13] cudaPackages_12: use gcc12 --- pkgs/development/compilers/cudatoolkit/versions.toml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pkgs/development/compilers/cudatoolkit/versions.toml b/pkgs/development/compilers/cudatoolkit/versions.toml index 7e9fcae3271ac..a201a4a263f5e 100644 --- a/pkgs/development/compilers/cudatoolkit/versions.toml +++ b/pkgs/development/compilers/cudatoolkit/versions.toml @@ -76,8 +76,4 @@ gcc = "gcc11" version = "12.0.1" url = "https://developer.download.nvidia.com/compute/cuda/12.0.1/local_installers/cuda_12.0.1_525.85.12_linux.run" sha256 = "sha256-GyBaBicvFGP0dydv2rkD8/ZmkXwGjlIHOAAeacehh1s=" -# CUDA 12 is compatible with gcc12, but nixpkgs default gcc is still on gcc11 as -# of 2023-01-08. See https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements. -# This should be upgraded to gcc12 once nixpkgs default gcc is upgraded. Other -# CUDA versions should likely have their gcc versions upgraded as well. -gcc = "gcc11" +gcc = "gcc12" From 2b69d618c28bdcbc822843a534c2cb74542ec972 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Wed, 1 Mar 2023 23:52:07 +0200 Subject: [PATCH 08/13] opencv3: respect config.cudaCapabilities --- pkgs/development/libraries/opencv/3.x.nix | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pkgs/development/libraries/opencv/3.x.nix b/pkgs/development/libraries/opencv/3.x.nix index e1a13c9fe573f..75d8c712df1f6 100644 --- a/pkgs/development/libraries/opencv/3.x.nix +++ b/pkgs/development/libraries/opencv/3.x.nix @@ -15,8 +15,8 @@ , enableContrib ? true , enableCuda ? (config.cudaSupport or false) && - stdenv.hostPlatform.isx86_64, cudatoolkit - + stdenv.hostPlatform.isx86_64 +, cudaPackages ? { } , enableUnfree ? false , enableIpp ? false , enablePython ? false, pythonPackages ? null @@ -40,6 +40,9 @@ assert blas.implementation == "openblas" && lapack.implementation == "openblas"; assert enablePython -> pythonPackages != null; let + inherit (cudaPackages) cudatoolkit; + inherit (cudaPackages.cudaFlags) cudaCapabilities; + version = "3.4.18"; src = fetchFromGitHub { @@ -242,6 +245,8 @@ stdenv.mkDerivation { "-DCUDA_FAST_MATH=ON" "-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc" "-DCUDA_NVCC_FLAGS=--expt-relaxed-constexpr" + "-DCUDA_ARCH_BIN=${lib.concatStringsSep ";" cudaCapabilities}" + "-DCUDA_ARCH_PTX=${lib.last cudaCapabilities}" ] ++ lib.optionals stdenv.isDarwin [ "-DWITH_OPENCL=OFF" "-DWITH_LAPACK=OFF" From c376c54f70b91c68f6f2ddc90838b57a82b12ecd Mon Sep 17 00:00:00 2001 From: Someone Date: Thu, 2 Mar 2023 17:47:47 +0000 Subject: [PATCH 09/13] cudaPackages.cudatoolkit: refactor inheriting passthru.cc Co-authored-by: Connor Baker --- pkgs/development/compilers/cudatoolkit/common.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/development/compilers/cudatoolkit/common.nix b/pkgs/development/compilers/cudatoolkit/common.nix index e6d7cbc377cf1..1195f7be7de63 100644 --- a/pkgs/development/compilers/cudatoolkit/common.nix +++ b/pkgs/development/compilers/cudatoolkit/common.nix @@ -272,7 +272,7 @@ backendStdenv.mkDerivation rec { popd ''; passthru = { - cc = backendStdenv.cc; + inherit (backendStdenv) cc; majorMinorVersion = lib.versions.majorMinor version; majorVersion = lib.versions.majorMinor version; }; From 8bf5f5ac893ff07406a3a1979d944c2a86cfc887 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Fri, 3 Mar 2023 02:19:50 +0200 Subject: [PATCH 10/13] magma: use CMAKE_CUDA_ARCHITECTURES directly --- .../compilers/cudatoolkit/flags.nix | 3 ++ .../libraries/science/math/magma/generic.nix | 42 ++++++++++++------- .../libraries/science/math/magma/releases.nix | 31 +------------- 3 files changed, 30 insertions(+), 46 deletions(-) diff --git a/pkgs/development/compilers/cudatoolkit/flags.nix b/pkgs/development/compilers/cudatoolkit/flags.nix index a43485a7dcfd4..b65219369404f 100644 --- a/pkgs/development/compilers/cudatoolkit/flags.nix +++ b/pkgs/development/compilers/cudatoolkit/flags.nix @@ -160,6 +160,9 @@ assert (formatCapabilities { cudaCapabilities = [ "7.5" "8.6" ]; }) == { # cudaComputeCapabilityToName :: String => String inherit cudaComputeCapabilityToName; + + # dropDot :: String -> String + inherit dropDot; } // formatCapabilities { cudaCapabilities = config.cudaCapabilities or supportedCapabilities; enableForwardCompat = config.cudaForwardCompat or true; diff --git a/pkgs/development/libraries/science/math/magma/generic.nix b/pkgs/development/libraries/science/math/magma/generic.nix index e27107ca15d80..f61f1877019b2 100644 --- a/pkgs/development/libraries/science/math/magma/generic.nix +++ b/pkgs/development/libraries/science/math/magma/generic.nix @@ -11,7 +11,8 @@ , cudaSupport ? true , fetchurl , gfortran -, gpuTargets ? [ ] +, cudaCapabilities ? cudaPackages.cudaFlags.cudaCapabilities +, gpuTargets ? [ ] # Non-CUDA targets, that is HIP , hip , hipblas , hipsparse @@ -36,12 +37,6 @@ let # of the first list *from* the second list. That means: # lists.subtractLists a b = b - a - # For CUDA - supportedCudaSmArches = lists.intersectLists cudaFlags.realArches supportedGpuTargets; - # Subtract the supported SM architectures from the real SM architectures to get the unsupported - # SM architectures. - unsupportedCudaSmArches = lists.subtractLists supportedCudaSmArches cudaFlags.realArches; - # For ROCm # NOTE: The hip.gpuTargets are prefixed with "gfx" instead of "sm" like cudaFlags.realArches. # For some reason, Magma's CMakeLists.txt file does not handle the "gfx" prefix, so we must @@ -62,19 +57,32 @@ let ) supported; - # Create the gpuTargetString. gpuTargetString = strings.concatStringsSep "," ( if gpuTargets != [ ] then # If gpuTargets is specified, it always takes priority. gpuArchWarner supportedCustomGpuTargets unsupportedCustomGpuTargets - else if cudaSupport then - gpuArchWarner supportedCudaSmArches unsupportedCudaSmArches else if rocmSupport then gpuArchWarner supportedRocmArches unsupportedRocmArches + else if cudaSupport then + [ ] # It's important we pass explicit -DGPU_TARGET to reset magma's defaults else throw "No GPU targets specified" ); + # E.g. [ "80" "86" "90" ] + cudaArchitectures = (builtins.map cudaFlags.dropDot cudaCapabilities); + + cudaArchitecturesString = strings.concatStringsSep ";" cudaArchitectures; + minArch = + let + minArch' = builtins.head (builtins.sort builtins.lessThan cudaArchitectures); + in + # If this fails some day, something must've changed and we should re-validate our assumptions + assert builtins.stringLength minArch' == 2; + # "75" -> "750" Cf. https://bitbucket.org/icl/magma/src/f4ec79e2c13a2347eff8a77a3be6f83bc2daec20/CMakeLists.txt#lines-273 + "${minArch'}0"; + + cuda_joined = symlinkJoin { name = "cuda-redist-${cudaVersion}"; paths = with cudaPackages; [ @@ -87,6 +95,8 @@ let }; in +assert (builtins.match "[^[:space:]]*" gpuTargetString) != null; + stdenv.mkDerivation { pname = "magma"; inherit version; @@ -116,7 +126,11 @@ stdenv.mkDerivation { openmp ]; - cmakeFlags = lists.optionals cudaSupport [ + cmakeFlags = [ + "-DGPU_TARGET=${gpuTargetString}" + ] ++ lists.optionals cudaSupport [ + "-DCMAKE_CUDA_ARCHITECTURES=${cudaArchitecturesString}" + "-DMIN_ARCH=${minArch}" # Disarms magma's asserts "-DCMAKE_C_COMPILER=${cudatoolkit.cc}/bin/cc" "-DCMAKE_CXX_COMPILER=${cudatoolkit.cc}/bin/c++" "-DMAGMA_ENABLE_CUDA=ON" @@ -126,14 +140,10 @@ stdenv.mkDerivation { "-DMAGMA_ENABLE_HIP=ON" ]; - # NOTE: We must set GPU_TARGET in preConfigure in this way because it may contain spaces. - preConfigure = '' - cmakeFlagsArray+=("-DGPU_TARGET=${gpuTargetString}") - '' # NOTE: The stdenv's CXX is used when compiling the CMake test to determine the version of # CUDA available. This isn't necessarily the same as cudatoolkit.cc, so we must set # CUDAHOSTCXX. - + strings.optionalString cudaSupport '' + preConfigure = strings.optionalString cudaSupport '' export CUDAHOSTCXX=${cudatoolkit.cc}/bin/c++ ''; diff --git a/pkgs/development/libraries/science/math/magma/releases.nix b/pkgs/development/libraries/science/math/magma/releases.nix index 3d08aa95d4d18..029f418edce3c 100644 --- a/pkgs/development/libraries/science/math/magma/releases.nix +++ b/pkgs/development/libraries/science/math/magma/releases.nix @@ -1,27 +1,13 @@ # NOTE: Order matters! Put the oldest version first, and the newest version last. # NOTE: Make sure the supportedGpuTargets are in order of oldest to newest. # You can update the supportedGpuTargets by looking at the CMakeLists.txt file. -# CUDA starts here: https://bitbucket.org/icl/magma/src/f4ec79e2c13a2347eff8a77a3be6f83bc2daec20/CMakeLists.txt#lines-175 # HIP is here: https://bitbucket.org/icl/magma/src/f4ec79e2c13a2347eff8a77a3be6f83bc2daec20/CMakeLists.txt#lines-386 +# CUDA works around magma's wrappers and uses FindCUDAToolkit directly [ { version = "2.6.2"; hash = "sha256-dbVU2rAJA+LRC5cskT5Q5/iMvGLzrkMrWghsfk7aCnE="; supportedGpuTargets = [ - "sm_20" - "sm_30" - "sm_35" - "sm_37" - "sm_50" - "sm_52" - "sm_53" - "sm_60" - "sm_61" - "sm_62" - "sm_70" - "sm_71" - "sm_75" - "sm_80" "700" "701" "702" @@ -53,21 +39,6 @@ version = "2.7.1"; hash = "sha256-2chxHAR6OMrhbv3nS+4uszMyF/0nEeHpuGBsu7SuGlA="; supportedGpuTargets = [ - "sm_20" - "sm_30" - "sm_35" - "sm_37" - "sm_50" - "sm_52" - "sm_53" - "sm_60" - "sm_61" - "sm_62" - "sm_70" - "sm_71" - "sm_75" - "sm_80" - "sm_90" "700" "701" "702" From dd2b27692e8a32316d263b938bddfa515eb2775a Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Fri, 3 Mar 2023 03:23:40 +0200 Subject: [PATCH 11/13] magma: explain `cudaSupport ? true` --- pkgs/development/libraries/science/math/magma/generic.nix | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkgs/development/libraries/science/math/magma/generic.nix b/pkgs/development/libraries/science/math/magma/generic.nix index f61f1877019b2..c997fcc090133 100644 --- a/pkgs/development/libraries/science/math/magma/generic.nix +++ b/pkgs/development/libraries/science/math/magma/generic.nix @@ -8,6 +8,11 @@ { blas , cmake , cudaPackages + # FIXME: cuda being unfree means ofborg won't eval "magma". + # respecting config.cudaSupport -> false by default + # -> ofborg eval -> throws "no GPU targets specified". + # Probably should delete everything but "magma-cuda" and "magma-hip" + # from all-packages.nix , cudaSupport ? true , fetchurl , gfortran From 0c25f5aa7ffb4ed9d4015fc273f51d08ff2a279b Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Fri, 3 Mar 2023 03:42:58 +0200 Subject: [PATCH 12/13] cudaPackages.cudatoolkit: remove unused gcc argument --- pkgs/development/compilers/cudatoolkit/common.nix | 1 - pkgs/development/compilers/cudatoolkit/extension.nix | 7 ++++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pkgs/development/compilers/cudatoolkit/common.nix b/pkgs/development/compilers/cudatoolkit/common.nix index 1195f7be7de63..fb3b50b981504 100644 --- a/pkgs/development/compilers/cudatoolkit/common.nix +++ b/pkgs/development/compilers/cudatoolkit/common.nix @@ -11,7 +11,6 @@ args@ , fetchurl , fontconfig , freetype -, gcc # :: String , gdk-pixbuf , glib , glibc diff --git a/pkgs/development/compilers/cudatoolkit/extension.nix b/pkgs/development/compilers/cudatoolkit/extension.nix index 72cab97f8ffc5..dd6f7ff2abe7b 100644 --- a/pkgs/development/compilers/cudatoolkit/extension.nix +++ b/pkgs/development/compilers/cudatoolkit/extension.nix @@ -17,7 +17,12 @@ final: prev: let backendStdenv = prev.pkgs."${finalVersion.gcc}Stdenv"; ### Add classic cudatoolkit package - cudatoolkit = buildCudaToolkitPackage (finalVersion // { inherit backendStdenv; }); + cudatoolkit = + let + attrs = builtins.removeAttrs finalVersion [ "gcc" ]; + attrs' = attrs // { inherit backendStdenv; }; + in + buildCudaToolkitPackage attrs'; cudaFlags = final.callPackage ./flags.nix {}; From ac64f07f9c8b9bcc4a4b6d285146cd50473d6b5d Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Fri, 3 Mar 2023 12:29:11 +0200 Subject: [PATCH 13/13] cudaPackages.cudaFlags: drop unused capabilitiesAndForward --- .../compilers/cudatoolkit/flags.nix | 21 ++++--------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/pkgs/development/compilers/cudatoolkit/flags.nix b/pkgs/development/compilers/cudatoolkit/flags.nix index b65219369404f..989fdb06c5dfb 100644 --- a/pkgs/development/compilers/cudatoolkit/flags.nix +++ b/pkgs/development/compilers/cudatoolkit/flags.nix @@ -19,13 +19,14 @@ let # passing a configuration based on your specific GPU environment. # # config.cudaCapabilities :: List Capability - # List of hardware generations to build - # Last item is considered the optional forward-compatibility arch + # List of hardware generations to build. # E.g. [ "8.0" ] + # Currently, the last item is considered the optional forward-compatibility arch, + # but this may change in the future. # # config.cudaForwardCompat :: Bool # Whether to include the forward compatibility gencode (+PTX) - # to support future GPU generations: + # to support future GPU generations. # E.g. true # # Please see the accompanying documentation or https://github.com/NixOS/nixpkgs/pull/205351 @@ -91,17 +92,6 @@ let formatCapabilities = { cudaCapabilities, enableForwardCompat ? true }: rec { inherit cudaCapabilities enableForwardCompat; - # forwardCapability :: String - # Forward "compute" capability, a.k.a PTX - # E.g. "8.6+PTX" - forwardCapability = (lists.last cudaCapabilities) + "+PTX"; - - # capabilitiesAndForward :: List String - # The list of supported CUDA architectures, including the forward compatibility architecture. - # If forward compatibility is disabled, this will be the same as cudaCapabilities. - # E.g. [ "7.5" "8.6" "8.6+PTX" ] - capabilitiesAndForward = cudaCapabilities ++ lists.optionals enableForwardCompat [ forwardCapability ]; - # archNames :: List String # E.g. [ "Turing" "Ampere" ] archNames = lists.unique (builtins.map (cap: cudaComputeCapabilityToName.${cap}) cudaCapabilities); @@ -141,9 +131,6 @@ assert (formatCapabilities { cudaCapabilities = [ "7.5" "8.6" ]; }) == { cudaCapabilities = [ "7.5" "8.6" ]; enableForwardCompat = true; - capabilitiesAndForward = [ "7.5" "8.6" "8.6+PTX" ]; - forwardCapability = "8.6+PTX"; - archNames = [ "Turing" "Ampere" ]; realArches = [ "sm_75" "sm_86" ]; virtualArches = [ "compute_75" "compute_86" ];