Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkgs/applications/science/math/caffe/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ stdenv.mkDerivation rec {
|| cudaSupport
|| !(leveldbSupport -> (leveldb != null && snappy != null))
|| !(cudnnSupport -> (hasCudnn && cudaSupport))
|| !(ncclSupport -> cudaSupport)
|| !(ncclSupport -> (cudaSupport && !nccl.meta.unsupported))
|| !(pythonSupport -> (python != null && numpy != null))
;
license = licenses.bsd2;
Expand Down
2 changes: 1 addition & 1 deletion pkgs/development/cuda-modules/cuda/overrides.nix
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ attrsets.filterAttrs (attr: _: (builtins.hasAttr attr prev)) {
env.autoPatchelfIgnoreMissingDeps =
prevAttrs.env.autoPatchelfIgnoreMissingDeps + " libnvrm_gpu.so libnvrm_mem.so libnvdla_runtime.so";
# `cuda_compat` only works on aarch64-linux, and only when building for Jetson devices.
brokenConditions = prevAttrs.brokenConditions // {
badPlatformsConditions = prevAttrs.badPlatformsConditions // {
"Trying to use cuda_compat on aarch64-linux targeting non-Jetson devices" =
!final.flags.isJetsonBuild;
};
Expand Down
18 changes: 13 additions & 5 deletions pkgs/development/cuda-modules/cudnn/shims.nix
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
# Shims to mimic the shape of ../modules/generic/manifests/{feature,redistrib}/release.nix
{package, redistArch}:
{
featureRelease.${redistArch}.outputs = {
lib = true;
static = true;
dev = true;
lib,
package,
# redistArch :: String
# String is "unsupported" if the given architecture is unsupported.
redistArch,
}:
{
featureRelease = lib.optionalAttrs (redistArch != "unsupported") {
${redistArch}.outputs = {
lib = true;
static = true;
dev = true;
};
};
redistribRelease = {
name = "NVIDIA CUDA Deep Neural Network library (cuDNN)";
Expand Down
1 change: 1 addition & 0 deletions pkgs/development/cuda-modules/cutensor/extension.nix
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ let
# A release is supported if it has a libPath that matches our CUDA version for our platform.
# LibPath are not constant across the same release -- one platform may support fewer
# CUDA versions than another.
# redistArch :: String
redistArch = flags.getRedistArch hostPlatform.system;
# platformIsSupported :: Manifests -> Boolean
platformIsSupported =
Expand Down
50 changes: 20 additions & 30 deletions pkgs/development/cuda-modules/flags.nix
Original file line number Diff line number Diff line change
Expand Up @@ -131,39 +131,29 @@ let
# `linux-aarch64` redist (which is for Jetson devices) if we're building any Jetson devices.
# Since both are based on aarch64, we can only have one or the other, otherwise there's an
# ambiguity as to which should be used.
# NOTE: This function *will* be called by unsupported systems because `cudaPackages` is part of
# `all-packages.nix`, which is evaluated on all systems. As such, we need to handle unsupported
# systems gracefully.
# getRedistArch :: String -> String
getRedistArch =
nixSystem:
if nixSystem == "aarch64-linux" then
if jetsonTargets != [] then "linux-aarch64" else "linux-sbsa"
else if nixSystem == "x86_64-linux" then
"linux-x86_64"
else if nixSystem == "ppc64le-linux" then
"linux-ppc64le"
else if nixSystem == "x86_64-windows" then
"windows-x86_64"
else
"unsupported";
getRedistArch = nixSystem: attrsets.attrByPath [ nixSystem ] "unsupported" {
aarch64-linux = if jetsonTargets != [] then "linux-aarch64" else "linux-sbsa";
x86_64-linux = "linux-x86_64";
ppc64le-linux = "linux-ppc64le";
x86_64-windows = "windows-x86_64";
};

# Maps NVIDIA redist arch to Nix system.
# It is imperative that we include the boolean condition based on jetsonTargets to ensure
# we don't advertise availability of packages only available on server-grade ARM
# as being available for the Jetson, since both `linux-sbsa` and `linux-aarch64` are
# mapped to the Nix system `aarch64-linux`.
getNixSystem =
redistArch:
if redistArch == "linux-sbsa" && jetsonTargets == [] then
"aarch64-linux"
else if redistArch == "linux-aarch64" && jetsonTargets != [] then
"aarch64-linux"
else if redistArch == "linux-x86_64" then
"x86_64-linux"
else if redistArch == "linux-ppc64le" then
"ppc64le-linux"
else if redistArch == "windows-x86_64" then
"x86_64-windows"
else
"unsupported-${redistArch}";
# NOTE: This function *will* be called by unsupported systems because `cudaPackages` is part of
# `all-packages.nix`, which is evaluated on all systems. As such, we need to handle unsupported
# systems gracefully.
# getNixSystem :: String -> String
getNixSystem = redistArch: attrsets.attrByPath [ redistArch ] "unsupported-${redistArch}" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be clear, I introduced the stringly string because I was in a rush. We should probably return some sort of Result/{ value, error } (e.g. in the format of tryEval)

I'll add a ticket to the project board for that

linux-sbsa = "aarch64-linux";
linux-aarch64 = "aarch64-linux";
linux-x86_64 = "x86_64-linux";
linux-ppc64le = "ppc64le-linux";
windows-x86_64 = "x86_64-windows";
};

formatCapabilities =
{
Expand Down
75 changes: 50 additions & 25 deletions pkgs/development/cuda-modules/generic-builders/manifest.nix
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ let
# Get the redist architectures for which package provides distributables.
# These are used by meta.platforms.
supportedRedistArchs = builtins.attrNames featureRelease;
# redistArch :: String
# The redistArch is the name of the architecture for which the redistributable is built.
# It is `"unsupported"` if the redistributable is not supported on the target platform.
redistArch = flags.getRedistArch hostPlatform.system;
in
backendStdenv.mkDerivation (
Expand Down Expand Up @@ -86,8 +89,18 @@ backendStdenv.mkDerivation (
"sample"
"python"
];
# Filter out outputs that don't exist in the redistributable.
# NOTE: In the case the redistributable isn't supported on the target platform,
# we will have `outputs = [ "out" ] ++ possibleOutputs`. This is of note because platforms which
# aren't supported would otherwise have evaluation errors when trying to access outputs other than `out`.
# The alternative would be to have `outputs = [ "out" ]` when`redistArch = "unsupported"`, but that would
# require adding guards throughout the entirety of the CUDA package set to ensure `cudaSupport` is true --
# recall that OfBorg will evaluate packages marked as broken and that `cudaPackages` will be evaluated with
# `cudaSupport = false`!
additionalOutputs =
if redistArch == "unsupported" then possibleOutputs else builtins.filter hasOutput possibleOutputs;
if redistArch == "unsupported"
then possibleOutputs
else builtins.filter hasOutput possibleOutputs;
# The out output is special -- it's the default output and we always include it.
outputs = [ "out" ] ++ additionalOutputs;
in
Expand All @@ -111,21 +124,32 @@ backendStdenv.mkDerivation (
python = ["**/*.whl"];
};

# Useful for introspecting why something went wrong.
# Maps descriptions of why the derivation would be marked broken to
# booleans indicating whether that description is true.
brokenConditions = {};
# Useful for introspecting why something went wrong. Maps descriptions of why the derivation would be marked as
# broken on have badPlatforms include the current platform.

src = fetchurl {
url =
if (builtins.hasAttr redistArch redistribRelease) then
"https://developer.download.nvidia.com/compute/${redistName}/redist/${
redistribRelease.${redistArch}.relative_path
}"
else
"cannot-construct-an-url-for-the-${redistArch}-platform";
sha256 = redistribRelease.${redistArch}.sha256 or lib.fakeHash;
};
# brokenConditions :: AttrSet Bool
# Sets `meta.broken = true` if any of the conditions are true.
# Example: Broken on a specific version of CUDA or when a dependency has a specific version.
brokenConditions = { };

# badPlatformsConditions :: AttrSet Bool
# Sets `meta.badPlatforms = meta.platforms` if any of the conditions are true.
# Example: Broken on a specific architecture when some condition is met (like targeting Jetson).
badPlatformsConditions = { };

# src :: Optional Derivation
src = trivial.pipe redistArch [
# If redistArch doesn't exist in redistribRelease, return null.
(redistArch: redistribRelease.${redistArch} or null)
# If the release is non-null, fetch the source; otherwise, return null.
(trivial.mapNullable (
{ relative_path, sha256, ... }:
fetchurl {
url = "https://developer.download.nvidia.com/compute/${redistName}/redist/${relative_path}";
inherit sha256;
}
))
];

postPatch = ''
if [[ -d pkg-config ]] ; then
Expand Down Expand Up @@ -284,17 +308,18 @@ backendStdenv.mkDerivation (
meta = {
description = "${redistribRelease.name}. By downloading and using the packages you accept the terms and conditions of the ${finalAttrs.meta.license.shortName}";
sourceProvenance = [sourceTypes.binaryNativeCode];
platforms =
lists.concatMap
(
redistArch:
let
nixSystem = flags.getNixSystem redistArch;
in
lists.optionals (!(strings.hasPrefix "unsupported-" nixSystem)) [ nixSystem ]
)
supportedRedistArchs;
broken = lists.any trivial.id (attrsets.attrValues finalAttrs.brokenConditions);
platforms = trivial.pipe supportedRedistArchs [
# Map each redist arch to the equivalent nix system or null if there is no equivalent.
(builtins.map flags.getNixSystem)
# Filter out unsupported systems
(builtins.filter (nixSystem: !(strings.hasPrefix "unsupported-" nixSystem)))
];
badPlatforms =
let
isBadPlatform = lists.any trivial.id (attrsets.attrValues finalAttrs.badPlatformsConditions);
in
lists.optionals isBadPlatform finalAttrs.meta.platforms;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"huh! Why are platforms and badPlatforms the same?"

Is this less or more confusing than adding just the current platform?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's fine, personally.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup probably a good thing we get the same meta regardless of where we evaluate

license = licenses.unfree;
maintainers = teams.cuda.members;
# Force the use of the default, fat output by default (even though `dev` exists, which
Expand Down
18 changes: 6 additions & 12 deletions pkgs/development/cuda-modules/generic-builders/multiplex.nix
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# The featureRelease is used to populate meta.platforms (by way of looking at the attribute names)
# and to determine the outputs of the package.
# shimFn :: {package, redistArch} -> AttrSet
shimsFn ? ({package, redistArch}: throw "shimsFn must be provided"),
shimsFn ? (throw "shimsFn must be provided"),
# fixupFn :: Path
# A path (or nix expression) to be evaluated with callPackage and then
# provided to the package's overrideAttrs function.
Expand All @@ -29,16 +29,8 @@
# - cudaVersion
# - mkVersionedPackageName
# - package
fixupFn ? (
{
final,
cudaVersion,
mkVersionedPackageName,
package,
...
}:
throw "fixupFn must be provided"
),
# - ...
fixupFn ? (throw "fixupFn must be provided"),
}:
let
inherit (lib)
Expand Down Expand Up @@ -80,9 +72,11 @@ let
&& strings.versionAtLeast package.maxCudaVersion cudaVersion;

# Get all of the packages for our given platform.
# redistArch :: String
# Value is `"unsupported"` if the platform is not supported.
redistArch = flags.getRedistArch hostPlatform.system;

allReleases = builtins.concatMap (xs: xs) (builtins.attrValues releaseSets);
allReleases = lists.flatten (builtins.attrValues releaseSets);

# All the supported packages we can build for our platform.
# perSystemReleases :: List Package
Expand Down
3 changes: 3 additions & 0 deletions pkgs/development/cuda-modules/nccl/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ backendStdenv.mkDerivation (
homepage = "https://developer.nvidia.com/nccl";
license = licenses.bsd3;
platforms = platforms.linux;
# NCCL is not supported on Jetson, because it does not use NVLink or PCI-e for inter-GPU communication.
# https://forums.developer.nvidia.com/t/can-jetson-orin-support-nccl/232845/9
badPlatforms = lib.optionals cudaFlags.isJetsonBuild [ "aarch64-linux" ];
maintainers =
with maintainers;
[
Expand Down
15 changes: 7 additions & 8 deletions pkgs/development/cuda-modules/tensorrt/fixup.nix
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,17 @@
}:
let
inherit (lib)
attrsets
maintainers
meta
strings
versions
;
targetArch =
if hostPlatform.isx86_64 then
"x86_64-linux-gnu"
else if hostPlatform.isAarch64 then
"aarch64-linux-gnu"
else
"unsupported";
# targetArch :: String
targetArch = attrsets.attrByPath [ hostPlatform.system ] "unsupported" {
x86_64-linux = "x86_64-linux-gnu";
aarch64-linux = "aarch64-linux-gnu";
};
in
finalAttrs: prevAttrs: {
# Useful for inspecting why something went wrong.
Expand Down Expand Up @@ -69,7 +68,7 @@ finalAttrs: prevAttrs: {

preInstall =
(prevAttrs.preInstall or "")
+ ''
+ strings.optionalString (targetArch != "unsupported") ''
# Replace symlinks to bin and lib with the actual directories from targets.
for dir in bin lib; do
rm "$dir"
Expand Down
24 changes: 16 additions & 8 deletions pkgs/development/cuda-modules/tensorrt/shims.nix
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
# Shims to mimic the shape of ../modules/generic/manifests/{feature,redistrib}/release.nix
{package, redistArch}:
{
featureRelease.${redistArch}.outputs = {
bin = true;
lib = true;
static = true;
dev = true;
sample = true;
python = true;
lib,
package,
# redistArch :: String
# String is `"unsupported"` if the given architecture is unsupported.
redistArch,
}:
{
featureRelease = lib.optionalAttrs (redistArch != "unsupported") {
${redistArch}.outputs = {
bin = true;
lib = true;
static = true;
dev = true;
sample = true;
python = true;
};
};
redistribRelease = {
name = "TensorRT: a high-performance deep learning interface";
Expand Down
2 changes: 1 addition & 1 deletion pkgs/development/libraries/science/math/magma/generic.nix
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ stdenv.mkDerivation {
description = "Matrix Algebra on GPU and Multicore Architectures";
license = licenses.bsd3;
homepage = "http://icl.cs.utk.edu/magma/index.html";
platforms = platforms.unix;
platforms = platforms.linux;
maintainers = with maintainers; [ connorbaker ];

# Cf. https://bitbucket.org/icl/magma/src/fcfe5aa61c1a4c664b36a73ebabbdbab82765e9f/CMakeLists.txt#lines-20
Expand Down
2 changes: 1 addition & 1 deletion pkgs/development/libraries/xgboost/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
, rPackages
}@inputs:

assert ncclSupport -> cudaSupport;
assert ncclSupport -> (cudaSupport && !cudaPackages.nccl.meta.unsupported);
# Disable regular tests when building the R package
# because 1) the R package runs its own tests and
# 2) the R package creates a different binary shared
Expand Down
3 changes: 2 additions & 1 deletion pkgs/development/python-modules/jaxlib/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ let
# aarch64-darwin is broken because of https://github.com/bazelbuild/rules_cc/pull/136
# however even with that fix applied, it doesn't work for everyone:
# https://github.com/NixOS/nixpkgs/pull/184395#issuecomment-1207287129
broken = stdenv.isDarwin;
# NOTE: We always build with NCCL; if it is unsupported, then our build is broken.
broken = stdenv.isDarwin || nccl.meta.unsupported;
};

cudatoolkit_joined = symlinkJoin {
Expand Down
13 changes: 8 additions & 5 deletions pkgs/development/python-modules/torch/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
magma,
magma-hip,
magma-cuda-static,
useSystemNccl ? true,
# Use the system NCCL as long as we're targeting CUDA on a supported platform.
useSystemNccl ? (cudaSupport && !cudaPackages.nccl.meta.unsupported),
MPISupport ? false, mpi,
buildDocs ? false,

Expand Down Expand Up @@ -273,9 +274,11 @@ in buildPythonPackage rec {
PYTORCH_BUILD_VERSION = version;
PYTORCH_BUILD_NUMBER = 0;

USE_NCCL = setBool (cudaSupport && cudaPackages ? nccl);
USE_SYSTEM_NCCL = setBool useSystemNccl; # don't build pytorch's third_party NCCL
USE_STATIC_NCCL = setBool useSystemNccl;
# In-tree builds of NCCL are not supported.
# Use NCCL when cudaSupport is enabled and nccl is available.
USE_NCCL = setBool useSystemNccl;
USE_SYSTEM_NCCL = USE_NCCL;
USE_STATIC_NCCL = USE_NCCL;

# Suppress a weird warning in mkl-dnn, part of ideep in pytorch
# (upstream seems to have fixed this in the wrong place?)
Expand Down Expand Up @@ -363,7 +366,7 @@ in buildPythonPackage rec {
] ++ lists.optionals (cudaPackages ? cudnn) [
cudnn.dev
cudnn.lib
] ++ lists.optionals (useSystemNccl && cudaPackages ? nccl) [
] ++ lists.optionals useSystemNccl [
# Some platforms do not support NCCL (i.e., Jetson)
nccl.dev # Provides nccl.h AND a static copy of NCCL!
] ++ lists.optionals (strings.versionOlder cudaVersion "11.8") [
Expand Down
Loading