From 1528efebb0c4cf14db5fd62514623196b6043b24 Mon Sep 17 00:00:00 2001 From: "Else, Someone" Date: Thu, 30 Oct 2025 04:43:14 +0200 Subject: [PATCH 1/2] check-meta: allow `problems`, use in remediation Waiting for a more comprehensive RFC 127 "Problems" implementation (#177272), proactively packages to specify `meta.problems` and use `problems` of kinds `unsupported` and `broken` to display context-aware remediation messages. This change is motivated by the merging of the CUDA13 PR, which included denying in-tree support for using CUDA without enabling it Nixpkgs-wide. Implementation-wise, unsupported packages were marked as `broken` (TBD: make "unsupported"), and reasons are visible when `--trace-verbose`, but obscured by the long and unhelpful NIXPKGS_ALLOW_BROKEN message. Instead of reverting the CUDA13 changes, which are blocking for a number of updates in the SciComp ecosystem, it seems better to allow customization of remediation mesages. In `cudaPackages` (and dependend `pythonXPackages`), we used to rely on an ad hoc `brokenConditions` passthru attribute. Instead of moving that to `meta`, rewrite them in the form compatible with the future RFC127 implementation. --- pkgs/stdenv/generic/check-meta.nix | 67 ++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 18 deletions(-) diff --git a/pkgs/stdenv/generic/check-meta.nix b/pkgs/stdenv/generic/check-meta.nix index c59e92e114ea1..c8c58cc99ec79 100644 --- a/pkgs/stdenv/generic/check-meta.nix +++ b/pkgs/stdenv/generic/check-meta.nix @@ -15,6 +15,7 @@ let concatMapStrings concatMapStringsSep concatStrings + concatStringsSep findFirst isDerivation length @@ -197,10 +198,20 @@ let pos_str = meta: meta.position or "«unknown-file»"; remediation = { - unfree = remediate_allowlist "Unfree" (remediate_predicate "allowUnfreePredicate"); - non-source = remediate_allowlist "NonSource" (remediate_predicate "allowNonSourcePredicate"); - broken = remediate_allowlist "Broken" (x: ""); - unsupported = remediate_allowlist "UnsupportedSystem" (x: ""); + unfree = + remediate_allowlist + # + "Unfree" + "insecure" + (remediate_predicate "allowUnfreePredicate"); + non-source = + remediate_allowlist + # + "NonSource" + "non-source" + (remediate_predicate "allowNonSourcePredicate"); + broken = remediate_allowlist "Broken" "broken" (x: ""); + unsupported = remediate_allowlist "UnsupportedSystem" "unsupported" (x: ""); blocklisted = x: ""; insecure = remediate_insecure; broken-outputs = remediateOutputsToInstall; @@ -240,20 +251,34 @@ let then pass `--impure` in order to allow use of environment variables. "; - remediate_allowlist = allow_attr: rebuild_amendment: attrs: '' - a) To temporarily allow ${remediation_phrase allow_attr}, you can use an environment variable - for a single invocation of the nix tools. - - $ export ${remediation_env_var allow_attr}=1 - ${flakeNote} - b) For `nixos-rebuild` you can set - { nixpkgs.config.allow${allow_attr} = true; } - in configuration.nix to override this. - ${rebuild_amendment attrs} - c) For `nix-env`, `nix-build`, `nix-shell` or any other Nix command you can add - { allow${allow_attr} = true; } - to ~/.config/nixpkgs/config.nix. - ''; + remediate_allowlist = + allow_attr: reason_attr: rebuild_amendment: attrs: + let + reasons = builtins.concatMap ( + { kind, message, ... }: optional (kind == reason_attr) message + ) attrs.meta.problems or [ ]; + hasReasons = reasons != [ ]; + in + optionalString hasReasons '' + + Known problems: + + '' + + concatStringsSep "\n" reasons + + optionalString (!hasReasons) '' + a) To temporarily allow ${remediation_phrase allow_attr}, you can use an environment variable + for a single invocation of the nix tools. + + $ export ${remediation_env_var allow_attr}=1 + ${flakeNote} + b) For `nixos-rebuild` you can set + { nixpkgs.config.allow${allow_attr} = true; } + in configuration.nix to override this. + ${rebuild_amendment attrs} + c) For `nix-env`, `nix-build`, `nix-shell` or any other Nix command you can add + { allow${allow_attr} = true; } + to ~/.config/nixpkgs/config.nix. + ''; remediate_insecure = attrs: @@ -362,6 +387,7 @@ let any listOf bool + submodule ; platforms = listOf (union [ str @@ -406,6 +432,11 @@ let unfree = bool; unsupported = bool; insecure = bool; + + # For actual schema cf. + # https://github.com/NixOS/rfcs/blob/master/rfcs/0127-issues-warnings.md#package-problems + problems = listOf (attrsOf any); + tests = { name = "test"; verify = From c24aef66d0d13cd950e74997eff95aa2b2f64b98 Mon Sep 17 00:00:00 2001 From: "Else, Someone" Date: Thu, 30 Oct 2025 06:06:06 +0200 Subject: [PATCH 2/2] =?UTF-8?q?treewide:=20ad=20hoc=20brokenConditions=20?= =?UTF-8?q?=E2=86=92=20meta.problems?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A couple Python/SciComp and CUDA packages previously relied on making `meta.broken` and `meta.unsupported` conditions more reliable by listing the reasons in `passthru.brokenConditions` (&c), used in conjunction with `addErrorContext` or ad hoc `assert`. Now that the need is more urgent (Cf. 25.11 config.cudaSupport requirement), use an RFC127-compatible scheme to list problems and display meaningful errors. cudaPackages: moved assertions from `backendStdenv` (keep it a tiny shim, hopefully eventually remove) to `cuda_nvcc`. --- doc/languages-frameworks/cuda.section.md | 6 +- .../cuda-modules/_cuda/lib/assertions.nix | 139 --------------- .../cuda-modules/_cuda/lib/default.nix | 10 +- .../cuda-modules/_cuda/lib/meta.nix | 85 +++------ .../cuda-modules/buildRedist/default.nix | 164 +++++++++--------- .../cuda-modules/packages/backendStdenv.nix | 140 +++------------ .../cuda-modules/packages/cuda_nvcc.nix | 105 +++++++++++ .../cuda-modules/packages/libnvshmem.nix | 2 +- .../cuda-modules/packages/nccl-tests.nix | 4 +- .../packages/tests/redists-installed.nix | 2 +- .../python-modules/bitsandbytes/default.nix | 29 +++- .../python-modules/torch/source/default.nix | 63 +++++-- 12 files changed, 311 insertions(+), 438 deletions(-) delete mode 100644 pkgs/development/cuda-modules/_cuda/lib/assertions.nix diff --git a/doc/languages-frameworks/cuda.section.md b/doc/languages-frameworks/cuda.section.md index 01b70402cf482..b767cf13b17cb 100644 --- a/doc/languages-frameworks/cuda.section.md +++ b/doc/languages-frameworks/cuda.section.md @@ -286,9 +286,9 @@ Whenever a new version of a redistributable manifest is made available: Updating package expressions amounts to: -- adding fixes conditioned on newer releases, like added or removed dependencies -- adding package expressions for new packages -- updating `passthru.brokenConditions` and `passthru.badPlatformsConditions` with various constraints, (e.g., new releases removing support for various architectures) +- adding fixes conditioned on newer releases, like added or removed dependencies, +- adding package expressions for new packages, +- updating `meta.problems`. #### Updating supported compilers and GPUs {#cuda-updating-supported-compilers-and-gpus} diff --git a/pkgs/development/cuda-modules/_cuda/lib/assertions.nix b/pkgs/development/cuda-modules/_cuda/lib/assertions.nix deleted file mode 100644 index f4413b28e4f30..0000000000000 --- a/pkgs/development/cuda-modules/_cuda/lib/assertions.nix +++ /dev/null @@ -1,139 +0,0 @@ -{ _cuda, lib }: -{ - /** - Evaluate assertions and add error context to return value. - - NOTE: No guarantees are made about this function's stability. You may use it at your own risk. - - # Type - - ``` - _evaluateAssertions - :: (assertions :: List { assertion :: Bool, message :: String }) - -> Bool - ``` - */ - _evaluateAssertions = - assertions: - let - failedAssertionsString = _cuda.lib._mkFailedAssertionsString assertions; - in - if failedAssertionsString == "" then - true - else - lib.addErrorContext "with failed assertions:${failedAssertionsString}" false; - - /** - Function to generate a string of failed assertions. - - NOTE: No guarantees are made about this function's stability. You may use it at your own risk. - - # Type - - ``` - _mkFailedAssertionsString - :: (assertions :: List { assertion :: Bool, message :: String }) - -> String - ``` - - # Inputs - - `assertions` - - : A list of assertions to evaluate - - # Examples - - :::{.example} - ## `_cuda.lib._mkFailedAssertionsString` usage examples - - ```nix - _mkFailedAssertionsString [ - { assertion = false; message = "Assertion 1 failed"; } - { assertion = true; message = "Assertion 2 failed"; } - ] - => "\n- Assertion 1 failed" - ``` - - ```nix - _mkFailedAssertionsString [ - { assertion = false; message = "Assertion 1 failed"; } - { assertion = false; message = "Assertion 2 failed"; } - ] - => "\n- Assertion 1 failed\n- Assertion 2 failed" - ``` - ::: - */ - _mkFailedAssertionsString = lib.foldl' ( - failedAssertionsString: - { assertion, message }: - failedAssertionsString + lib.optionalString (!assertion) ("\n- " + message) - ) ""; - - /** - Utility function to generate assertions for missing packages. - - Used to mark a package as unsupported if any of its required packages are missing (null). - - Expects a set of attributes. - - Most commonly used in overrides files on a callPackage-provided attribute set of packages. - - NOTE: We typically use platfromAssertions instead of brokenAssertions because the presence of packages set to null - means evaluation will fail if package attributes are accessed without checking for null first. OfBorg evaluation - sets allowBroken to true, which means we can't rely on brokenAssertions to prevent evaluation of a package with - missing dependencies. - - NOTE: No guarantees are made about this function's stability. You may use it at your own risk. - - # Type - - ``` - _mkMissingPackagesAssertions - :: (attrs :: AttrSet) - -> (assertions :: List { assertion :: Bool, message :: String }) - ``` - - # Inputs - - `attrs` - - : The attributes to check for null - - # Examples - - :::{.example} - ## `_cuda.lib._mkMissingPackagesAssertions` usage examples - - ```nix - { - lib, - libcal ? null, - libcublas, - utils, - }: - let - inherit (lib.attrsets) recursiveUpdate; - inherit (_cuda.lib) _mkMissingPackagesAssertions; - in - prevAttrs: { - passthru = prevAttrs.passthru or { } // { - platformAssertions = - prevAttrs.passthru.platformAssertions or [ ] - ++ _mkMissingPackagesAssertions { inherit libcal; }; - }; - } - ``` - ::: - */ - _mkMissingPackagesAssertions = lib.flip lib.pipe [ - # Take the attributes that are null. - (lib.filterAttrs (_: value: value == null)) - lib.attrNames - # Map them to assertions. - (lib.map (name: { - message = "${name} is available"; - assertion = false; - })) - ]; -} diff --git a/pkgs/development/cuda-modules/_cuda/lib/default.nix b/pkgs/development/cuda-modules/_cuda/lib/default.nix index 376abcd27f7bd..1404f5a3629d4 100644 --- a/pkgs/development/cuda-modules/_cuda/lib/default.nix +++ b/pkgs/development/cuda-modules/_cuda/lib/default.nix @@ -3,13 +3,6 @@ lib, }: { - # See ./assertions.nix for documentation. - inherit (import ./assertions.nix { inherit _cuda lib; }) - _evaluateAssertions - _mkFailedAssertionsString - _mkMissingPackagesAssertions - ; - # See ./cuda.nix for documentation. inherit (import ./cuda.nix { inherit _cuda lib; }) _cudaCapabilityIsDefault @@ -23,8 +16,9 @@ # See ./meta.nix for documentation. inherit (import ./meta.nix { inherit _cuda lib; }) + _hasProblemKind + _mkMetaProblems _mkMetaBadPlatforms - _mkMetaBroken ; # See ./redist.nix for documentation. diff --git a/pkgs/development/cuda-modules/_cuda/lib/meta.nix b/pkgs/development/cuda-modules/_cuda/lib/meta.nix index 7adc36474935d..002a7557c79f2 100644 --- a/pkgs/development/cuda-modules/_cuda/lib/meta.nix +++ b/pkgs/development/cuda-modules/_cuda/lib/meta.nix @@ -1,31 +1,29 @@ { _cuda, lib }: { - /** - Returns a list of bad platforms for a given package if assertsions in `finalAttrs.passthru.platformAssertions` - fail, optionally logging evaluation warnings with `builtins.traceVerbose` for each reason. - - NOTE: No guarantees are made about this function's stability. You may use it at your own risk. - - NOTE: This function requires `finalAttrs.passthru.platformAssertions` to be a list of assertions and - `finalAttrs.finalPackage.name` and `finalAttrs.finalPackage.stdenv` to be available. - - # Type - - ``` - _mkMetaBadPlatforms :: (finalAttrs :: AttrSet) -> List String - ``` - - # Inputs - - `finalAttrs` - - : The final attributes of the package - */ + # _mkMetaProblems ∷ [{ assertion ∷ bool, message ∷ str }] → [str] + _mkMetaProblems = builtins.concatMap ( + { + assertion, + kind, + message, + urls ? [ ], + ... + }@problem: + lib.lists.optional (!assertion) { inherit kind message urls; } + ); + + # _hasProblemKind ∷ { meta, ... } → bool + _hasProblemKind = + kind': finalAttrs: builtins.any ({ kind, ... }: kind == kind') finalAttrs.meta.problems or [ ]; + + # _mkMetaBadPlatforms ∷ { meta, ... } → [str] + # + # A helper for generating a short list of `badPlatforms` to be displayed in `errormsg` by `check-meta.nix`, + # when the real requirements of a package are more complex and dynamic than a matching CPU architecture. _mkMetaBadPlatforms = finalAttrs: let - failedAssertionsString = _cuda.lib._mkFailedAssertionsString finalAttrs.passthru.platformAssertions; - hasFailedAssertions = failedAssertionsString != ""; + hasFailedAssertions = _cuda.lib._hasProblemKind "unsupported" finalAttrs; finalStdenv = finalAttrs.finalPackage.stdenv; badPlatforms = lib.optionals hasFailedAssertions ( lib.unique [ @@ -34,45 +32,6 @@ finalStdenv.targetPlatform.system ] ); - handle = - if hasFailedAssertions then - builtins.traceVerbose "Package ${finalAttrs.finalPackage.name} is unsupported on this platform due to the following failed assertions:${failedAssertionsString}" - else - lib.id; - in - handle badPlatforms; - - /** - Returns a boolean indicating whether the package is broken as a result of `finalAttrs.passthru.brokenAssertions`, - optionally logging evaluation warnings with `builtins.traceVerbose` for each reason. - - NOTE: No guarantees are made about this function's stability. You may use it at your own risk. - - NOTE: This function requires `finalAttrs.passthru.brokenAssertions` to be a list of assertions and - `finalAttrs.finalPackage.name` to be available. - - # Type - - ``` - _mkMetaBroken :: (finalAttrs :: AttrSet) -> Bool - ``` - - # Inputs - - `finalAttrs` - - : The final attributes of the package - */ - _mkMetaBroken = - finalAttrs: - let - failedAssertionsString = _cuda.lib._mkFailedAssertionsString finalAttrs.passthru.brokenAssertions; - hasFailedAssertions = failedAssertionsString != ""; - handle = - if hasFailedAssertions then - builtins.traceVerbose "Package ${finalAttrs.finalPackage.name} is marked as broken due to the following failed assertions:${failedAssertionsString}" - else - lib.id; in - handle hasFailedAssertions; + badPlatforms; } diff --git a/pkgs/development/cuda-modules/buildRedist/default.nix b/pkgs/development/cuda-modules/buildRedist/default.nix index 0317d85c9b226..d98f065be31e0 100644 --- a/pkgs/development/cuda-modules/buildRedist/default.nix +++ b/pkgs/development/cuda-modules/buildRedist/default.nix @@ -36,7 +36,12 @@ let unique ; inherit (lib.trivial) mapNullable pipe; - inherit (_cuda.lib) _mkMetaBadPlatforms _mkMetaBroken _redistSystemIsSupported; + inherit (_cuda.lib) + _hasProblemKind + _mkMetaProblems + _mkMetaBadPlatforms + _redistSystemIsSupported + ; inherit (lib) licenses sourceTypes @@ -143,10 +148,6 @@ extendMkDerivation { passthru ? { }, meta ? { }, - # Misc - brokenAssertions ? [ ], - platformAssertions ? [ ], - # Order is important here so we use a list. expectedOutputs ? [ "out" @@ -343,14 +344,6 @@ extendMkDerivation { # NOTE: `release` may be null, so we must use `lib.defaultTo` or (getSupportedReleases (lib.defaultTo { } finalAttrs.passthru.release)); - supportedNixSystems = - passthru.supportedNixSystems or (pipe finalAttrs.passthru.supportedReleases [ - attrNames - (concatMap getNixSystems) - naturalSort - unique - ]); - supportedRedistSystems = passthru.supportedRedistSystems or (naturalSort (attrNames finalAttrs.passthru.supportedReleases)); @@ -367,70 +360,6 @@ extendMkDerivation { # Taken and modified from: # https://github.com/NixOS/nixpkgs/blob/fe5e11faed6241aacf7220436088789287507494/pkgs/build-support/setup-hooks/multiple-outputs.sh#L45-L62 inherit outputNameVarFallbacks; - - # brokenAssertions :: [Attrs] - # Used by mkMetaBroken to set `meta.broken`. - # Example: Broken on a specific version of CUDA or when a dependency has a specific version. - # NOTE: Do not use this when a broken assertion means evaluation will fail! For example, if - # a package is missing and is required for the build -- that should go in platformAssertions, - # because attempts to access attributes on the package will cause evaluation errors. - brokenAssertions = [ - { - message = "CUDA support is enabled by config.cudaSupport"; - assertion = config.cudaSupport; - } - { - message = "lib output precedes static output"; - assertion = - let - libIndex = findFirstIndex (x: x == "lib") null finalAttrs.outputs; - staticIndex = findFirstIndex (x: x == "static") null finalAttrs.outputs; - in - libIndex == null || staticIndex == null || libIndex < staticIndex; - } - { - # NOTE: We cannot (easily) check that all expected outputs have a corresponding outputNameVar attribute in - # finalAttrs because of the presence of attributes which use the "output" prefix but are not outputNameVars - # (e.g., outputChecks and outputName). - message = "outputNameVarFallbacks is a super set of expectedOutputs"; - assertion = - subtractLists (map mkOutputNameVar finalAttrs.passthru.expectedOutputs) ( - attrNames finalAttrs.passthru.outputNameVarFallbacks - ) == [ ]; - } - { - message = "outputToPatterns is a super set of expectedOutputs"; - assertion = - subtractLists finalAttrs.passthru.expectedOutputs (attrNames finalAttrs.passthru.outputToPatterns) - == [ ]; - } - { - message = "propagatedBuildOutputs is a subset of outputs"; - assertion = subtractLists finalAttrs.outputs finalAttrs.propagatedBuildOutputs == [ ]; - } - ] - ++ brokenAssertions; - - # platformAssertions :: [Attrs] - # Used by mkMetaBadPlatforms to set `meta.badPlatforms`. - # Example: Broken on a specific system when some condition is met, like targeting Jetson or - # a required package missing. - # NOTE: Use this when a failed assertion means evaluation can fail! - platformAssertions = - let - isSupportedRedistSystem = _redistSystemIsSupported hostRedistSystem finalAttrs.passthru.supportedRedistSystems; - in - [ - { - message = "src is null if and only if hostRedistSystem is unsupported"; - assertion = (finalAttrs.src == null) == !isSupportedRedistSystem; - } - { - message = "hostRedistSystem (${hostRedistSystem}) is supported (${builtins.toJSON finalAttrs.passthru.supportedRedistSystems})"; - assertion = isSupportedRedistSystem; - } - ] - ++ platformAssertions; }; meta = meta // { @@ -438,9 +367,6 @@ extendMkDerivation { By downloading and using this package you accept the terms and conditions of the associated license(s). ''; sourceProvenance = meta.sourceProvenance or [ sourceTypes.binaryNativeCode ]; - platforms = finalAttrs.passthru.supportedNixSystems; - broken = _mkMetaBroken finalAttrs; - badPlatforms = _mkMetaBadPlatforms finalAttrs; downloadPage = meta.downloadPage or "https://developer.download.nvidia.com/compute/${finalAttrs.passthru.redistName}/redist/${finalAttrs.pname}"; @@ -457,6 +383,84 @@ extendMkDerivation { else [ licenses.nvidiaCuda ]; teams = meta.teams or [ ] ++ [ teams.cuda ]; + + broken = meta.broken or false || _hasProblemKind "broken" finalAttrs; + platforms = + meta.platforms or (pipe finalAttrs.passthru.supportedReleases [ + attrNames + (concatMap getNixSystems) + naturalSort + unique + ]); + badPlatforms = _mkMetaBadPlatforms finalAttrs; + + problems = + let + isSupportedRedistSystem = _redistSystemIsSupported hostRedistSystem finalAttrs.passthru.supportedRedistSystems; + in + meta.problems or [ ] + ++ _mkMetaProblems [ + { + kind = "unsupported"; + message = '' + CUDA without global `config.cudaSupport` is unsafe and unsupported. + Cf. NixOS 25.11 Release Notes. + + a) Use `import { config.cudaSupport = true; }`. + b) For `nixos-rebuild`, set + { nixpkgs.config.cudaSupport = true; } + in `configuration.nix`. + c) For `nix-env`, `nix-build`, `nix-shell` or any other Nix command you can add + { cudaSupport = true; } + to ~/.config/nixpkgs/config.nix. + ''; + assertion = config.cudaSupport; + } + { + kind = "unsupported"; + message = "src is null if and only if hostRedistSystem is unsupported"; + assertion = (finalAttrs.src == null) == !isSupportedRedistSystem; + } + { + kind = "unsupported"; + message = "hostRedistSystem (${hostRedistSystem}) is supported (${builtins.toJSON finalAttrs.passthru.supportedRedistSystems})"; + assertion = isSupportedRedistSystem; + } + + { + kind = "broken"; + message = "lib output precedes static output"; + assertion = + let + libIndex = findFirstIndex (x: x == "lib") null finalAttrs.outputs; + staticIndex = findFirstIndex (x: x == "static") null finalAttrs.outputs; + in + libIndex == null || staticIndex == null || libIndex < staticIndex; + } + { + kind = "broken"; + # NOTE: We cannot (easily) check that all expected outputs have a corresponding outputNameVar attribute in + # finalAttrs because of the presence of attributes which use the "output" prefix but are not outputNameVars + # (e.g., outputChecks and outputName). + message = "outputNameVarFallbacks is a super set of expectedOutputs"; + assertion = + subtractLists (map mkOutputNameVar finalAttrs.passthru.expectedOutputs) ( + attrNames finalAttrs.passthru.outputNameVarFallbacks + ) == [ ]; + } + { + kind = "broken"; + message = "outputToPatterns is a super set of expectedOutputs"; + assertion = + subtractLists finalAttrs.passthru.expectedOutputs (attrNames finalAttrs.passthru.outputToPatterns) + == [ ]; + } + { + kind = "broken"; + message = "propagatedBuildOutputs is a subset of outputs"; + assertion = subtractLists finalAttrs.outputs finalAttrs.propagatedBuildOutputs == [ ]; + } + ]; }; } # Setup the outputNameVar variables to gracefully handle missing outputs. diff --git a/pkgs/development/cuda-modules/packages/backendStdenv.nix b/pkgs/development/cuda-modules/packages/backendStdenv.nix index c7b0dbd549a58..ec298d410aed5 100644 --- a/pkgs/development/cuda-modules/packages/backendStdenv.nix +++ b/pkgs/development/cuda-modules/packages/backendStdenv.nix @@ -24,7 +24,7 @@ let inherit (_cuda.lib) _cudaCapabilityIsDefault _cudaCapabilityIsSupported - _mkFailedAssertionsString + _mkMetaProblems getRedistSystem mkVersionedName ; @@ -45,25 +45,11 @@ let ; inherit (lib.versions) major; - # NOTE: By virtue of processing a sorted list (allSortedCudaCapabilities), our groups will be sorted. - - architectureSpecificCudaCapabilities = filter ( - cudaCapability: cudaCapabilityToInfo.${cudaCapability}.isArchitectureSpecific - ) allSortedCudaCapabilities; - - familySpecificCudaCapabilities = filter ( - cudaCapability: cudaCapabilityToInfo.${cudaCapability}.isFamilySpecific - ) allSortedCudaCapabilities; - - jetsonCudaCapabilities = filter ( - cudaCapability: cudaCapabilityToInfo.${cudaCapability}.isJetson - ) allSortedCudaCapabilities; - passthruExtra = { - nvccHostCCMatchesStdenvCC = backendStdenv.cc == stdenv.cc; - # TODO(@connorbaker): Does it make sense to expose the `stdenv` we were called with and the `stdenv` selected - # prior to using `stdenvAdapters.useLibsFrom`? + # NOTE: All of these attributes are internal details subject to removal! + + supportedByNvcc = maybeBackendStdenv.valid; # The Nix system of the host platform. hostNixSystem = stdenv.hostPlatform.system; @@ -100,96 +86,19 @@ let else passthruExtra.defaultCudaCapabilities; - # Requested architecture-specific CUDA capabilities. - requestedArchitectureSpecificCudaCapabilities = intersectLists architectureSpecificCudaCapabilities passthruExtra.cudaCapabilities; - - # Whether the requested CUDA capabilities include architecture-specific CUDA capabilities. - hasArchitectureSpecificCudaCapability = - passthruExtra.requestedArchitectureSpecificCudaCapabilities != [ ]; - - # Requested family-specific CUDA capabilities. - requestedFamilySpecificCudaCapabilities = intersectLists familySpecificCudaCapabilities passthruExtra.cudaCapabilities; - - # Whether the requested CUDA capabilities include family-specific CUDA capabilities. - hasFamilySpecificCudaCapability = passthruExtra.requestedFamilySpecificCudaCapabilities != [ ]; + jetsonCudaCapabilities = filter ( + cudaCapability: cudaCapabilityToInfo.${cudaCapability}.isJetson + ) allSortedCudaCapabilities; # Requested Jetson CUDA capabilities. - requestedJetsonCudaCapabilities = intersectLists jetsonCudaCapabilities passthruExtra.cudaCapabilities; + requestedJetsonCudaCapabilities = intersectLists passthruExtra.jetsonCudaCapabilities passthruExtra.cudaCapabilities; # Whether the requested CUDA capabilities include Jetson CUDA capabilities. hasJetsonCudaCapability = passthruExtra.requestedJetsonCudaCapabilities != [ ]; - }; - - assertions = - let - # Jetson devices (pre-Thor) cannot be targeted by the same binaries which target non-Jetson devices. While - # NVIDIA provides both `linux-aarch64` and `linux-sbsa` packages, which both target `aarch64`, - # they are built with different settings and cannot be mixed. - preThorJetsonCudaCapabilities = filter (flip versionOlder "10.1") passthruExtra.requestedJetsonCudaCapabilities; - postThorJetsonCudaCapabilities = filter (flip versionAtLeast "10.1") passthruExtra.requestedJetsonCudaCapabilities; - # Remove all known capabilities from the user's list to find unrecognized capabilities. - unrecognizedCudaCapabilities = subtractLists allSortedCudaCapabilities passthruExtra.cudaCapabilities; - - # Capabilities which are too old for this CUDA version. - tooOldCudaCapabilities = filter ( - cap: - let - # This can be null! - maybeMax = cudaCapabilityToInfo.${cap}.maxCudaMajorMinorVersion; - in - maybeMax != null && lib.versionOlder maybeMax cudaMajorMinorVersion - ) passthruExtra.cudaCapabilities; - - # Capabilities which are too new for this CUDA version. - tooNewCudaCapabilities = filter ( - cap: lib.versionOlder cudaMajorMinorVersion cudaCapabilityToInfo.${cap}.minCudaMajorMinorVersion - ) passthruExtra.cudaCapabilities; - in - [ - { - message = "Requested unrecognized CUDA capabilities: ${toJSON unrecognizedCudaCapabilities}"; - assertion = unrecognizedCudaCapabilities == [ ]; - } - { - message = "Requested CUDA capabilities which are too old for CUDA ${cudaMajorMinorVersion}: ${toJSON tooOldCudaCapabilities}"; - assertion = tooOldCudaCapabilities == [ ]; - } - { - message = "Requested CUDA capabilities which are too new for CUDA ${cudaMajorMinorVersion}: ${toJSON tooNewCudaCapabilities}"; - assertion = tooNewCudaCapabilities == [ ]; - } - { - message = - "Requested Jetson CUDA capabilities (${toJSON passthruExtra.requestedJetsonCudaCapabilities}) require " - + "hostPlatform (${passthruExtra.hostNixSystem}) to be aarch64-linux"; - assertion = passthruExtra.hasJetsonCudaCapability -> passthruExtra.hostNixSystem == "aarch64-linux"; - } - { - message = - "Requested pre-Thor (10.1) Jetson CUDA capabilities (${toJSON preThorJetsonCudaCapabilities}) cannot be " - + "specified with other capabilities (${toJSON (subtractLists preThorJetsonCudaCapabilities passthruExtra.cudaCapabilities)})"; - assertion = - # If there are preThorJetsonCudaCapabilities, they must be the only requested capabilities. - preThorJetsonCudaCapabilities != [ ] - -> preThorJetsonCudaCapabilities == passthruExtra.cudaCapabilities; - } - { - message = - "Requested pre-Thor (10.1) Jetson CUDA capabilities (${toJSON preThorJetsonCudaCapabilities}) require " - + "computed NVIDIA hostRedistSystem (${passthruExtra.hostRedistSystem}) to be linux-aarch64"; - assertion = - preThorJetsonCudaCapabilities != [ ] -> passthruExtra.hostRedistSystem == "linux-aarch64"; - } - { - message = - "Requested post-Thor (10.1) Jetson CUDA capabilities (${toJSON postThorJetsonCudaCapabilities}) require " - + "computed NVIDIA hostRedistSystem (${passthruExtra.hostRedistSystem}) to be linux-sbsa"; - assertion = postThorJetsonCudaCapabilities != [ ] -> passthruExtra.hostRedistSystem == "linux-sbsa"; - } - ]; - - failedAssertionsString = _mkFailedAssertionsString assertions; + # Whether the requested CUDA capabilities include family-specific CUDA capabilities. + hasFamilySpecificCudaCapability = passthruExtra.requestedFamilySpecificCudaCapabilities != [ ]; + }; # TODO(@connorbaker): Seems like `stdenvAdapters.useLibsFrom` breaks clangStdenv's ability to find header files. # To reproduce: use `nix shell .#cudaPackages_12_6.backendClangStdenv.cc` since CUDA 12.6 supports at most Clang @@ -217,7 +126,7 @@ let # ``` # TODO(@connorbaker): Seems like even using unmodified `clangStdenv` causes issues -- saxpy fails to build CMake # errors during CUDA compiler identification about invalid redefinitions of things like `realpath`. - backendStdenv = + maybeBackendStdenv = let hostCCName = if stdenv.cc.isGNU then @@ -252,20 +161,13 @@ let (findFirst (x: x != null) null) ]; in - # If the current stdenv's compiler version is compatible, or we're on an unsupported host system, use stdenv - # directly. - # If we're on an unsupported host system (like darwin), there's not much else we can do, but we should not break - # evaluation on unsupported systems. - if stdenvIsSupportedVersion || passthruExtra.hostRedistSystem == "unsupported" then - stdenv - # Otherwise, try to find a compatible stdenv. - else - assert assertMsg (maybeHostStdenv != null) - "backendStdenv: no supported host compiler found (tried ${hostCCName} ${versions.minMajorVersion} to ${versions.maxMajorVersion})"; - stdenvAdapters.useLibsFrom stdenv maybeHostStdenv; + # The actual error messages are generated in cuda_nvcc based on backendStdenv.supportedByNvcc + rec { + valid = + maybeHostStdenv != null + && stdenvIsSupportedVersion + && passthruExtra.hostRedistSystem != "unsupported"; + value = if valid then stdenvAdapters.useLibsFrom stdenv maybeHostStdenv else stdenv; + }; in -# TODO: Consider testing whether we in fact use the newer libstdc++ -# NOTE: The assertion message we get from `extendDerivation` is not at all helpful. Instead, we use assertMsg. -assert assertMsg (failedAssertionsString == "") - "${mkVersionedName "cudaPackages" cudaMajorMinorVersion}.backendStdenv has failed assertions:${failedAssertionsString}"; -extendDerivation true passthruExtra backendStdenv +extendDerivation true passthruExtra maybeBackendStdenv.value diff --git a/pkgs/development/cuda-modules/packages/cuda_nvcc.nix b/pkgs/development/cuda-modules/packages/cuda_nvcc.nix index 47906b5977c16..a501ae96e6859 100644 --- a/pkgs/development/cuda-modules/packages/cuda_nvcc.nix +++ b/pkgs/development/cuda-modules/packages/cuda_nvcc.nix @@ -4,6 +4,7 @@ buildRedist, cudaAtLeast, cudaOlder, + cudaMajorMinorVersion, cuda_cccl, lib, libnvvm, @@ -174,5 +175,109 @@ buildRedist (finalAttrs: { description = "CUDA compiler driver"; homepage = "https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc"; mainProgram = "nvcc"; + + # TODO: Consider testing whether we in fact use the newer libstdc++ + problems = + let + inherit (builtins) filter toJSON; + inherit (_cuda.db) allSortedCudaCapabilities cudaCapabilityToInfo; + inherit (lib.lists) intersectLists subtractLists; + inherit (lib.versions) versionOlder versionAtLeast; + inherit (lib) flip; + + # NOTE: By virtue of processing a sorted list (allSortedCudaCapabilities), our groups will be sorted. + + architectureSpecificCudaCapabilities = filter ( + cudaCapability: cudaCapabilityToInfo.${cudaCapability}.isArchitectureSpecific + ) allSortedCudaCapabilities; + + jetsonCudaCapabilities = filter ( + cudaCapability: cudaCapabilityToInfo.${cudaCapability}.isJetson + ) allSortedCudaCapabilities; + + # Requested Jetson CUDA capabilities. + requestedJetsonCudaCapabilities = intersectLists jetsonCudaCapabilities backendStdenv.cudaCapabilities; + + # Whether the requested CUDA capabilities include Jetson CUDA capabilities. + hasJetsonCudaCapability = requestedJetsonCudaCapabilities != [ ]; + + # Jetson devices (pre-Thor) cannot be targeted by the same binaries which target non-Jetson devices. While + # NVIDIA provides both `linux-aarch64` and `linux-sbsa` packages, which both target `aarch64`, + # they are built with different settings and cannot be mixed. + preThorJetsonCudaCapabilities = filter (flip versionOlder "10.1") requestedJetsonCudaCapabilities; + postThorJetsonCudaCapabilities = filter (flip versionAtLeast "10.1") requestedJetsonCudaCapabilities; + + # Remove all known capabilities from the user's list to find unrecognized capabilities. + unrecognizedCudaCapabilities = subtractLists allSortedCudaCapabilities backendStdenv.cudaCapabilities; + + # Capabilities which are too old for this CUDA version. + tooOldCudaCapabilities = filter ( + cap: + let + # This can be null! + maybeMax = cudaCapabilityToInfo.${cap}.maxCudaMajorMinorVersion; + in + maybeMax != null && lib.versionOlder maybeMax cudaMajorMinorVersion + ) backendStdenv.cudaCapabilities; + + # Capabilities which are too new for this CUDA version. + tooNewCudaCapabilities = filter ( + cap: lib.versionOlder cudaMajorMinorVersion cudaCapabilityToInfo.${cap}.minCudaMajorMinorVersion + ) backendStdenv.cudaCapabilities; + in + _cuda.lib._mkMetaProblems [ + { + kind = "unsupported"; + assertion = backendStdenv.supportedByNvcc; + message = "Couldn't find a host compiler compatible with NVCC."; + } + { + kind = "broken"; + message = "Requested unrecognized CUDA capabilities: ${toJSON unrecognizedCudaCapabilities}"; + assertion = unrecognizedCudaCapabilities == [ ]; + } + { + kind = "unsupported"; + message = "Requested CUDA capabilities which are too old for CUDA ${cudaMajorMinorVersion}: ${toJSON tooOldCudaCapabilities}"; + assertion = tooOldCudaCapabilities == [ ]; + } + { + kind = "unsupported"; + message = "Requested CUDA capabilities which are too new for CUDA ${cudaMajorMinorVersion}: ${toJSON tooNewCudaCapabilities}"; + assertion = tooNewCudaCapabilities == [ ]; + } + { + kind = "unsupported"; + message = + "Requested Jetson CUDA capabilities (${toJSON requestedJetsonCudaCapabilities}) require " + + "hostPlatform (${backendStdenv.hostNixSystem}) to be aarch64-linux"; + assertion = hasJetsonCudaCapability -> backendStdenv.hostNixSystem == "aarch64-linux"; + } + { + kind = "unsupported"; + message = + "Requested pre-Thor (10.1) Jetson CUDA capabilities (${toJSON preThorJetsonCudaCapabilities}) cannot be " + + "specified with other capabilities (${toJSON (subtractLists preThorJetsonCudaCapabilities backendStdenv.cudaCapabilities)})"; + assertion = + # If there are preThorJetsonCudaCapabilities, they must be the only requested capabilities. + preThorJetsonCudaCapabilities != [ ] + -> preThorJetsonCudaCapabilities == backendStdenv.cudaCapabilities; + } + { + kind = "unsupported"; + message = + "Requested pre-Thor (10.1) Jetson CUDA capabilities (${toJSON preThorJetsonCudaCapabilities}) require " + + "computed NVIDIA hostRedistSystem (${backendStdenv.hostRedistSystem}) to be linux-aarch64"; + assertion = + preThorJetsonCudaCapabilities != [ ] -> backendStdenv.hostRedistSystem == "linux-aarch64"; + } + { + kind = "unsupported"; + message = + "Requested post-Thor (10.1) Jetson CUDA capabilities (${toJSON postThorJetsonCudaCapabilities}) require " + + "computed NVIDIA hostRedistSystem (${backendStdenv.hostRedistSystem}) to be linux-sbsa"; + assertion = postThorJetsonCudaCapabilities != [ ] -> backendStdenv.hostRedistSystem == "linux-sbsa"; + } + ]; }; }) diff --git a/pkgs/development/cuda-modules/packages/libnvshmem.nix b/pkgs/development/cuda-modules/packages/libnvshmem.nix index 0f92bcee64323..b9503cb13725f 100644 --- a/pkgs/development/cuda-modules/packages/libnvshmem.nix +++ b/pkgs/development/cuda-modules/packages/libnvshmem.nix @@ -179,7 +179,7 @@ backendStdenv.mkDerivation (finalAttrs: { meta = { description = "Parallel programming interface for NVIDIA GPUs based on OpenSHMEM"; homepage = "https://github.com/NVIDIA/nvshmem"; - broken = _cuda.lib._mkMetaBroken finalAttrs; + broken = _cuda.lib._hasProblemKind "broken" finalAttrs; # NOTE: There are many licenses: # https://github.com/NVIDIA/nvshmem/blob/7dd48c9fd7aa2134264400802881269b7822bd2f/License.txt license = licenses.nvidiaCudaRedist; diff --git a/pkgs/development/cuda-modules/packages/nccl-tests.nix b/pkgs/development/cuda-modules/packages/nccl-tests.nix index 4a7528a10356a..b6964155e04dc 100644 --- a/pkgs/development/cuda-modules/packages/nccl-tests.nix +++ b/pkgs/development/cuda-modules/packages/nccl-tests.nix @@ -18,7 +18,7 @@ which, }: let - inherit (_cuda.lib) _mkMetaBroken; + inherit (_cuda.lib) _hasProblemKind; inherit (lib) licenses maintainers teams; inherit (lib.attrsets) getBin; inherit (lib.lists) optionals; @@ -107,7 +107,7 @@ backendStdenv.mkDerivation (finalAttrs: { "x86_64-linux" ]; license = licenses.bsd3; - broken = _mkMetaBroken finalAttrs; + broken = _hasProblemKind "broken" finalAttrs; maintainers = with maintainers; [ jmillerpdt ]; teams = [ teams.cuda ]; }; diff --git a/pkgs/development/cuda-modules/packages/tests/redists-installed.nix b/pkgs/development/cuda-modules/packages/tests/redists-installed.nix index 6a0ee88ae8835..010faf8eae401 100644 --- a/pkgs/development/cuda-modules/packages/tests/redists-installed.nix +++ b/pkgs/development/cuda-modules/packages/tests/redists-installed.nix @@ -49,7 +49,7 @@ linkedWithoutLicenses.overrideAttrs ( }; meta = prevAttrs.meta or { } // { - broken = _cuda.lib._mkMetaBroken finalAttrs; + broken = _cuda.lib._hasProblemKind "broken" finalAttrs; license = lib.unique ( lib.concatMap (drv: lib.toList (drv.meta.license or [ ])) ( lib.attrValues availableRedistsForPlatform diff --git a/pkgs/development/python-modules/bitsandbytes/default.nix b/pkgs/development/python-modules/bitsandbytes/default.nix index 125386dec9b65..400b799d03502 100644 --- a/pkgs/development/python-modules/bitsandbytes/default.nix +++ b/pkgs/development/python-modules/bitsandbytes/default.nix @@ -16,6 +16,7 @@ scipy, trove-classifiers, + _cuda, # NOTE: (ab)using cudaPackages' internal lib; unstable interfaces! cudaSupport ? torch.cudaSupport, cudaPackages ? torch.cudaPackages, rocmSupport ? torch.rocmSupport, @@ -28,10 +29,24 @@ let pname = "bitsandbytes"; version = "0.48.1"; - brokenConditions = lib.attrsets.filterAttrs (_: cond: cond) { - "CUDA and ROCm are mutually exclusive" = cudaSupport && rocmSupport; - "CUDA is not targeting Linux" = cudaSupport && !stdenv.hostPlatform.isLinux; + fakeFinalAttrs = { + meta = { + inherit problems; + }; + finalPackage.stdenv = stdenv; }; + problems = _cuda.lib._mkMetaProblems [ + { + kind = "unsupported"; + message = "CUDA and ROCm are mutually exclusive"; + assertion = !(cudaSupport && rocmSupport); + } + { + kind = "unsupported"; + message = "Nixpkgs only supports CUDA on Linux"; + assertion = !(cudaSupport && !stdenv.hostPlatform.isLinux); + } + ]; inherit (cudaPackages) cudaMajorMinorVersion; rocmMajorMinorVersion = lib.versions.majorMinor rocmPackages.rocm-core.version; @@ -187,7 +202,6 @@ buildPythonPackage { cudaPackages rocmSupport rocmPackages - brokenConditions # To help debug when a package is broken due to CUDA support ; }; @@ -200,5 +214,12 @@ buildPythonPackage { bcdarwin jk ]; + + inherit problems; + + # Internal helper from cudaPackages, expects `finalAttrs`, which are + # unsupported by pythonXPackages at the time of writing + badPlatforms = _cuda.lib._mkMetaBadPlatforms fakeFinalAttrs; + broken = _cuda.lib._hasProblemKind "broken" fakeFinalAttrs; }; } diff --git a/pkgs/development/python-modules/torch/source/default.nix b/pkgs/development/python-modules/torch/source/default.nix index 6e333a4f13c60..ecb3918c7a2c3 100644 --- a/pkgs/development/python-modules/torch/source/default.nix +++ b/pkgs/development/python-modules/torch/source/default.nix @@ -10,6 +10,7 @@ runCommand, writeShellScript, config, + _cuda, # NOTE: (ab)using cudaPackages' internal lib; unstable interfaces! cudaSupport ? config.cudaSupport, cudaPackages, autoAddDriverRunpath, @@ -241,24 +242,49 @@ let ''; }; - brokenConditions = attrsets.filterAttrs (_: cond: cond) { - "CUDA and ROCm are mutually exclusive" = cudaSupport && rocmSupport; - "CUDA is not targeting Linux" = cudaSupport && !stdenv.hostPlatform.isLinux; - "Unsupported CUDA version" = - cudaSupport - && !(builtins.elem cudaPackages.cudaMajorVersion [ - "11" - "12" - ]); - "MPI cudatoolkit does not match cudaPackages.cudatoolkit" = - MPISupport && cudaSupport && (mpi.cudatoolkit != cudaPackages.cudatoolkit); + fakeFinalAttrs = { + meta = { + inherit problems; + }; + finalPackage.stdenv = stdenv'; + }; + problems = _cuda.lib._mkMetaProblems [ + { + message = "CUDA and ROCm are mutually exclusive"; + assertion = !(cudaSupport && rocmSupport); + kind = "unsupported"; + } + { + message = "Nixpkgs only supports CUDA on Linux"; + assertion = cudaSupport -> stdenv.hostPlatform.isLinux; + kind = "broken"; + } + { + message = "CUDA version unsupported upstream"; + assertion = + cudaSupport + -> (builtins.elem cudaPackages.cudaMajorVersion [ + "11" + "12" + ]); + kind = "unsupported"; + } + { + message = "MPI cudatoolkit does not match cudaPackages.cudatoolkit"; + assertion = MPISupport -> (cudaSupport && (mpi.cudatoolkit == cudaPackages.cudatoolkit)); + kind = "broken"; + } # This used to be a deep package set comparison between cudaPackages and # effectiveMagma.cudaPackages, making torch too strict in cudaPackages. # In particular, this triggered warnings from cuda's `aliases.nix` - "Magma cudaPackages does not match cudaPackages" = - cudaSupport - && (effectiveMagma.cudaPackages.cudaMajorMinorVersion != cudaPackages.cudaMajorMinorVersion); - }; + { + message = "Magma cudaPackages does not match cudaPackages"; + assertion = + cudaSupport + -> (effectiveMagma.cudaPackages.cudaMajorMinorVersion == cudaPackages.cudaMajorMinorVersion); + kind = "broken"; + } + ]; unroll-src = writeShellScript "unroll-src" '' echo "{ @@ -760,8 +786,6 @@ buildPythonPackage.override { inherit stdenv; } rec { cudaCapabilities = if cudaSupport then supportedCudaCapabilities else [ ]; # At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability. blasProvider = blas.provider; - # To help debug when a package is broken due to CUDA support - inherit brokenConditions; tests = callPackage ../tests { }; }; @@ -777,8 +801,11 @@ buildPythonPackage.override { inherit stdenv; } rec { thoughtpolice tscholak ]; # tscholak esp. for darwin-related builds + + inherit problems; platforms = lib.platforms.linux ++ lib.optionals (!cudaSupport && !rocmSupport) lib.platforms.darwin; - broken = builtins.any trivial.id (builtins.attrValues brokenConditions); + broken = _cuda.lib._hasProblemKind "broken" fakeFinalAttrs; + badPlatforms = _cuda.lib._mkMetaBadPlatforms fakeFinalAttrs; }; }