Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix pytorchWithCuda, fix cupy, upgrade cudnn #166784

Merged
merged 13 commits into from
Apr 3, 2022
4 changes: 4 additions & 0 deletions pkgs/development/compilers/cudatoolkit/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -86,5 +86,9 @@ rec {
gcc = gcc10; # can bump to 11 along with stdenv.cc
};

# Make sure to only ever update this to a version that is compatible with the
# latest cudnn, nccl, cutensor, etc! It sometimes happens that CUDA versions
# are released prior to compatibility with the rest of the ecosystem. And
# don't forget to request a review from @NixOS/cuda-maintainers!
cudatoolkit_11 = cudatoolkit_11_5;
}
40 changes: 18 additions & 22 deletions pkgs/development/libraries/science/math/cudnn/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -81,31 +81,27 @@ rec {
cudnn_8_1_cudatoolkit_11_2 = cudnn_8_1_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_11_2; };

cudnn_8_1_cudatoolkit_10 = cudnn_8_1_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_10; };
cudnn_8_1_cudatoolkit_11 = cudnn_8_1_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_11; };

# cuDNN 8.3 is necessary for the latest jaxlib, esp. jaxlib-bin. See
# https://github.com/google/jax/discussions/9455 for more info.
cudnn_8_3_cudatoolkit_10_2 =
generic
rec {
version = "8.3.2";
cudatoolkit = cudatoolkit_10_2;
# See https://docs.nvidia.com/deeplearning/cudnn/archives/cudnn-832/support-matrix/index.html#cudnn-cuda-hardware-versions.
minCudaVersion = "10.2.00000";
maxCudaVersion = "11.5.99999";
mkSrc = cudatoolkit:
let v = if lib.versions.majorMinor cudatoolkit.version == "10.2" then "10.2" else "11.5"; in
fetchurl {
# Starting at version 8.3.1 there's a new directory layout including
# a subdirectory `local_installers`.
url = "https://developer.download.nvidia.com/compute/redist/cudnn/v${version}/local_installers/${v}/cudnn-linux-x86_64-8.3.2.44_cuda${v}-archive.tar.xz";
hash = {
"10.2" = "sha256-1vVu+cqM+PketzIQumw9ykm6REbBZhv6/lXB7EC2aaw=";
"11.5" = "sha256-VQCVPAjF5dHd3P2iNPnvvdzb5DpTsm3AqCxyP6FwxFc=";
}."${v}";
};
}
;
cudnn_8_3_cudatoolkit_10_2 = generic rec {
version = "8.3.2";
cudatoolkit = cudatoolkit_10_2;
# See https://docs.nvidia.com/deeplearning/cudnn/archives/cudnn-832/support-matrix/index.html#cudnn-cuda-hardware-versions.
minCudaVersion = "10.2.00000";
maxCudaVersion = "11.5.99999";
mkSrc = cudatoolkit:
let v = if lib.versions.majorMinor cudatoolkit.version == "10.2" then "10.2" else "11.5"; in
fetchurl {
# Starting at version 8.3.1 there's a new directory layout including
# a subdirectory `local_installers`.
url = "https://developer.download.nvidia.com/compute/redist/cudnn/v${version}/local_installers/${v}/cudnn-linux-x86_64-8.3.2.44_cuda${v}-archive.tar.xz";
hash = {
"10.2" = "sha256-1vVu+cqM+PketzIQumw9ykm6REbBZhv6/lXB7EC2aaw=";
"11.5" = "sha256-VQCVPAjF5dHd3P2iNPnvvdzb5DpTsm3AqCxyP6FwxFc=";
}."${v}";
};
};
Comment on lines +87 to +104
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change is purely a matter of formatting.

cudnn_8_3_cudatoolkit_11_0 = cudnn_8_3_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_11_0; };
cudnn_8_3_cudatoolkit_11_1 = cudnn_8_3_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_11_1; };
cudnn_8_3_cudatoolkit_11_2 = cudnn_8_3_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_11_2; };
Expand Down
47 changes: 23 additions & 24 deletions pkgs/development/libraries/science/math/cutensor/default.nix
Original file line number Diff line number Diff line change
@@ -1,25 +1,33 @@
{ callPackage
, cudatoolkit_10_1, cudatoolkit_10_2
, cudatoolkit_11_0, cudatoolkit_11_1, cudatoolkit_11_2, cudatoolkit_11_3, cudatoolkit_11_4
, cudatoolkit_10_1
, cudatoolkit_10_2
, cudatoolkit_11
, cudatoolkit_11_0
, cudatoolkit_11_1
, cudatoolkit_11_2
, cudatoolkit_11_3
, cudatoolkit_11_4
, cudatoolkit_11_5
, cudatoolkit_11_6
}:

rec {
cutensor_cudatoolkit_10_1 = callPackage ./generic.nix rec {
version = "1.2.2.5";
libPath = "lib/10.1";
cudatoolkit = cudatoolkit_10_1;
# 1.2.2 is compatible with CUDA 11.0, 11.1, and 11.2:
# ephemeral doc at https://developer.nvidia.com/cutensor/downloads
sha256 = "1dl9bd71frhac9cb8lvnh71zfsnqxbxbfhndvva2zf6nh0my4klm";
# 1.2.2 is compatible with CUDA 10.1, 10.2, and 11.x.
# See https://docs.nvidia.com/cuda/cutensor/release_notes.html#cutensor-v1-2-2.
hash = "sha256-lU7iK4DWuC/U3s1Ct/rq2Gr3w4F2U7RYYgpmF05bibY=";
};

cutensor_cudatoolkit_10_2 = cutensor_cudatoolkit_10_1.override {
version = "1.3.1.3";
libPath = "lib/10.2";
cudatoolkit = cudatoolkit_10_2;
# 1.3.1 is compatible with CUDA 11.0, 11.1, and 11.2:
# ephemeral doc at https://developer.nvidia.com/cutensor/downloads
sha256 = "sha256-mNlVnabB2IC3HnYY0mb06RLqQzDxN9ePGVeBy3hkBC8=";
# 1.3.1 is compatible with CUDA 10.2 and 11.x.
# See https://docs.nvidia.com/cuda/cutensor/release_notes.html#cutensor-v1-3-1.
hash = "sha256-mNlVnabB2IC3HnYY0mb06RLqQzDxN9ePGVeBy3hkBC8=";
};

cutensor_cudatoolkit_10 = cutensor_cudatoolkit_10_2;
Expand All @@ -29,21 +37,12 @@ rec {
cudatoolkit = cudatoolkit_11_0;
};

cutensor_cudatoolkit_11_1 = cutensor_cudatoolkit_11_0.override {
cudatoolkit = cudatoolkit_11_1;
};

cutensor_cudatoolkit_11_2 = cutensor_cudatoolkit_11_0.override {
cudatoolkit = cudatoolkit_11_2;
};

cutensor_cudatoolkit_11_3 = cutensor_cudatoolkit_11_0.override {
cudatoolkit = cudatoolkit_11_3;
};

cutensor_cudatoolkit_11_4 = cutensor_cudatoolkit_11_0.override {
cudatoolkit = cudatoolkit_11_4;
};
cutensor_cudatoolkit_11_1 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_1; };
cutensor_cudatoolkit_11_2 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_2; };
cutensor_cudatoolkit_11_3 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_3; };
cutensor_cudatoolkit_11_4 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_4; };
cutensor_cudatoolkit_11_5 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_5; };
cutensor_cudatoolkit_11_6 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_6; };

cutensor_cudatoolkit_11 = cutensor_cudatoolkit_11_4;
cutensor_cudatoolkit_11 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11; };
}
4 changes: 2 additions & 2 deletions pkgs/development/libraries/science/math/cutensor/generic.nix
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
, addOpenGLRunpath

, version
, sha256
, hash
}:

let
Expand All @@ -21,7 +21,7 @@ stdenv.mkDerivation {

src = fetchurl {
url = "https://developer.download.nvidia.com/compute/cutensor/${mostOfVersion}/local_installers/libcutensor-${stdenv.hostPlatform.parsed.kernel.name}-${stdenv.hostPlatform.parsed.cpu.name}-${version}.tar.gz";
inherit sha256;
inherit hash;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why hash vs sha256?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Iiuc hash is preferred to sha256 as it is more future proof, but I don't remember the reference for that

Copy link
Contributor

@SomeoneSerge SomeoneSerge Apr 3, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

# fetchurl/default.nix

, # SRI hash.
  hash ? ""

, # Legacy ways of specifying the hash.
  outputHash ? ""
, outputHashAlgo ? ""
, md5 ? ""
, sha1 ? ""
, sha256 ? ""
, sha512 ? ""

Hmm, I guess

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, we are switching everywhere (though slowly) to SRI hashes.

};

outputs = [ "out" "dev" ];
Expand Down
4 changes: 4 additions & 0 deletions pkgs/development/libraries/science/math/nccl/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ stdenv.mkDerivation rec {

enableParallelBuilding = true;

passthru = {
inherit cudatoolkit;
};
Comment on lines +41 to +43
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is necessary for the assert in cupy/default.nix.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍🏻


meta = with lib; {
description = "Multi-GPU and multi-node collective communication primitives for NVIDIA GPUs";
homepage = "https://developer.nvidia.com/nccl";
Expand Down
11 changes: 11 additions & 0 deletions pkgs/development/python-modules/cupy/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
, addOpenGLRunpath
}:

assert cudnn.cudatoolkit == cudatoolkit;
assert cutensor.cudatoolkit == cudatoolkit;
assert nccl.cudatoolkit == cudatoolkit;
Comment on lines +8 to +10
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was discovered here that cupy was accidentally pulling in multiple cudatoolkit versions. These asserts should prevent that issue going forward.


buildPythonPackage rec {
pname = "cupy";
version = "10.2.0";
Expand All @@ -15,8 +19,15 @@ buildPythonPackage rec {
sha256 = "sha256-5ovvA76QGOsOnVztMfDgLerks5nJrKR08rLc+ArmWA8=";
};

# See https://docs.cupy.dev/en/v10.2.0/reference/environment.html. Seting both
# CUPY_NUM_BUILD_JOBS and CUPY_NUM_NVCC_THREADS to NIX_BUILD_CORES results in
# a small amount of thrashing but it turns out there are a large number of
# very short builds and a few extremely long ones, so setting both ends up
# working nicely in practice.
preConfigure = ''
export CUDA_PATH=${cudatoolkit}
export CUPY_NUM_BUILD_JOBS="$NIX_BUILD_CORES"
export CUPY_NUM_NVCC_THREADS="$NIX_BUILD_CORES"
'';

nativeBuildInputs = [
Expand Down
10 changes: 7 additions & 3 deletions pkgs/top-level/all-packages.nix
Original file line number Diff line number Diff line change
Expand Up @@ -4647,7 +4647,6 @@ with pkgs;
cudnn_8_1_cudatoolkit_11_1
cudnn_8_1_cudatoolkit_11_2
cudnn_8_1_cudatoolkit_10
cudnn_8_1_cudatoolkit_11
cudnn_8_3_cudatoolkit_10_2
cudnn_8_3_cudatoolkit_11_0
cudnn_8_3_cudatoolkit_11_1
Expand All @@ -4658,8 +4657,8 @@ with pkgs;
cudnn_8_3_cudatoolkit_10
cudnn_8_3_cudatoolkit_11;

# TODO(samuela): This is old and should be upgraded to 8.3 at some point.
cudnn = cudnn_7_6_cudatoolkit_10_1;
# Make sure to keep this in sync with the `cudatoolkit` version!
cudnn = cudnn_8_3_cudatoolkit_10;

cutensorPackages = callPackages ../development/libraries/science/math/cutensor { };
inherit (cutensorPackages)
Expand Down Expand Up @@ -33022,11 +33021,16 @@ with pkgs;
### SCIENCE / MATH

caffe = callPackage ../applications/science/math/caffe ({
cudaSupport = config.cudaSupport or false;
cudatoolkit = cudatoolkit_10_1;
cudnn = cudnn_7_6_cudatoolkit_10_1;
opencv3 = opencv3WithoutCuda; # Used only for image loading.
blas = openblas;
inherit (darwin.apple_sdk.frameworks) Accelerate CoreGraphics CoreVideo;
} // (config.caffe or {}));

caffeWithCuda = caffe.override { cudaSupport = true; };

caffe2 = callPackage ../development/libraries/science/math/caffe2 (rec {
inherit (python3Packages) python future six numpy pydot;
protobuf = protobuf3_1;
Expand Down
25 changes: 24 additions & 1 deletion pkgs/top-level/python-packages.nix
Original file line number Diff line number Diff line change
Expand Up @@ -1459,6 +1459,11 @@ in {
inherit (self) python numpy boost;
});

caffeWithCuda = toPythonModule (pkgs.caffeWithCuda.override {
pythonSupport = true;
inherit (self) python numpy boost;
});

cairocffi = callPackage ../development/python-modules/cairocffi { };

cairosvg = callPackage ../development/python-modules/cairosvg { };
Expand Down Expand Up @@ -1961,7 +1966,7 @@ in {

cupy = callPackage ../development/python-modules/cupy {
cudatoolkit = pkgs.cudatoolkit_11;
cudnn = pkgs.cudnn_8_1_cudatoolkit_11;
cudnn = pkgs.cudnn_8_3_cudatoolkit_11;
nccl = pkgs.nccl_cudatoolkit_11;
cutensor = pkgs.cutensor_cudatoolkit_11;
};
Expand Down Expand Up @@ -8362,6 +8367,24 @@ in {

pytorch = callPackage ../development/python-modules/pytorch {
cudaSupport = pkgs.config.cudaSupport or false;

# TODO: next time pytorch is updated (to 1.11.0, currently in staging as of
# 2022-03-31), make the following changes:

# -> cudatoolk_11
cudatoolkit = pkgs.cudatoolkit_10;

# -> cudnn_8_3_cudatoolkit_11
cudnn = pkgs.cudnn_8_1_cudatoolkit_10;

# -> cutensor_cudatoolkit_11 (cutensor is a new dependency in v1.11.0)
# cutensor = pkgs.cutensor_cudatoolkit_11;

# -> setting a custom magma should be unnecessary with v1.11.0
magma = pkgs.magma.override { cudatoolkit = pkgs.cudatoolkit_10; };

# -> nccl_cudatoolkit_11
nccl = pkgs.nccl.override { cudatoolkit = pkgs.cudatoolkit_10; };
};

pytorch-bin = callPackage ../development/python-modules/pytorch/bin.nix { };
Expand Down