diff --git a/pkgs/development/compilers/gcc/common/builder.nix b/pkgs/development/compilers/gcc/common/builder.nix
index 98525b5e237ef..25c5646338653 100644
--- a/pkgs/development/compilers/gcc/common/builder.nix
+++ b/pkgs/development/compilers/gcc/common/builder.nix
@@ -1,6 +1,7 @@
 { lib
 , stdenv
 , enableMultilib
+, targetConfig
 }:
 
 let
@@ -196,6 +197,13 @@ originalAttrs: (stdenv.mkDerivation (finalAttrs: originalAttrs // {
     mkdir -p "$out/''${targetConfig}/lib"
     mkdir -p "''${!outputLib}/''${targetConfig}/lib"
   '' +
+  # if cross-compiling, link from $lib/lib to $lib/${targetConfig}.
+  # since native-compiles have $lib/lib as a directory (not a
+  # symlink), this ensures that in every case we can assume that
+  # $lib/lib contains the .so files
+  lib.optionalString (with stdenv; targetPlatform.config != hostPlatform.config) ''
+    ln -Ts "''${!outputLib}/''${targetConfig}/lib" $lib/lib
+  '' +
   # Make `lib64` symlinks to `lib`.
   lib.optionalString (!enableMultilib && stdenv.hostPlatform.is64bit && !stdenv.hostPlatform.isMips64n32) ''
     ln -s lib "$out/''${targetConfig}/lib64"
diff --git a/pkgs/development/compilers/gcc/common/libgcc.nix b/pkgs/development/compilers/gcc/common/libgcc.nix
index c8342ae90054a..a7de840adc8d0 100644
--- a/pkgs/development/compilers/gcc/common/libgcc.nix
+++ b/pkgs/development/compilers/gcc/common/libgcc.nix
@@ -83,10 +83,6 @@ in
     lib.optionalString (!langC) ''
       rm -f $out/lib/libgcc_s.so*
     ''
-    + lib.optionalString (hostPlatform != targetPlatform) ''
-      mkdir -p $lib/lib/
-      ln -s ${targetPlatformSlash}lib $lib/lib
-    ''
 
     # TODO(amjoseph): remove the `libgcc_s.so` symlinks below and replace them
     # with a `-L${gccForLibs.libgcc}/lib` in cc-wrapper's
diff --git a/pkgs/development/compilers/gcc/default.nix b/pkgs/development/compilers/gcc/default.nix
index cc3546bed22cf..0144ab4cfff9b 100644
--- a/pkgs/development/compilers/gcc/default.nix
+++ b/pkgs/development/compilers/gcc/default.nix
@@ -103,6 +103,7 @@ let inherit version;
     disableBootstrap = atLeast11 && !stdenv.hostPlatform.isDarwin && (atLeast12 -> !profiledCompiler);
 
     inherit (stdenv) buildPlatform hostPlatform targetPlatform;
+    targetConfig = if targetPlatform != hostPlatform then targetPlatform.config else null;
 
     patches = callFile ./patches {};
 
@@ -124,6 +125,7 @@ let inherit version;
         buildPlatform
         hostPlatform
         targetPlatform
+        targetConfig
         patches
         crossMingw
         stageNameAddon
@@ -329,7 +331,7 @@ lib.pipe ((callFile ./common/builder.nix {}) ({
     ++ optional (is7 && targetPlatform.isAarch64) "--enable-fix-cortex-a53-843419"
     ++ optional (is7 && targetPlatform.isNetBSD) "--disable-libcilkrts";
 
-  targetConfig = if targetPlatform != hostPlatform then targetPlatform.config else null;
+  inherit targetConfig;
 
   buildFlags =
     # we do not yet have Nix-driven profiling
diff --git a/pkgs/development/cuda-modules/backend-stdenv.nix b/pkgs/development/cuda-modules/backend-stdenv.nix
index bcca7118b163b..32386ffbdd4c9 100644
--- a/pkgs/development/cuda-modules/backend-stdenv.nix
+++ b/pkgs/development/cuda-modules/backend-stdenv.nix
@@ -3,10 +3,8 @@
   nvccCompatibilities,
   cudaVersion,
   pkgs,
-  overrideCC,
   stdenv,
-  wrapCCWith,
-  stdenvAdapters,
+  stdenvAdapters
 }:
 
 let
diff --git a/pkgs/development/cuda-modules/cuda-library-samples/extension.nix b/pkgs/development/cuda-modules/cuda-library-samples/extension.nix
index 4cb34af732095..9092a653bd5e9 100644
--- a/pkgs/development/cuda-modules/cuda-library-samples/extension.nix
+++ b/pkgs/development/cuda-modules/cuda-library-samples/extension.nix
@@ -1,8 +1,10 @@
-{hostPlatform, lib}:
+{backendStdenv, lib}:
 let
+  inherit (backendStdenv.hostPlatform) isx86_64 isLinux;
+
   # Samples are built around the CUDA Toolkit, which is not available for
   # aarch64. Check for both CUDA version and platform.
-  platformIsSupported = hostPlatform.isx86_64 && hostPlatform.isLinux;
+  platformIsSupported = isx86_64 && isLinux;
 
   # Build our extension
   extension =
diff --git a/pkgs/development/cuda-modules/cuda-library-samples/generic.nix b/pkgs/development/cuda-modules/cuda-library-samples/generic.nix
index d4182536654e1..3c080c8a9c382 100644
--- a/pkgs/development/cuda-modules/cuda-library-samples/generic.nix
+++ b/pkgs/development/cuda-modules/cuda-library-samples/generic.nix
@@ -76,7 +76,7 @@ in
       # CUTENSOR_ROOT is double escaped
       postPatch = ''
         substituteInPlace CMakeLists.txt \
-          --replace "\''${CUTENSOR_ROOT}/include" "${cutensor.dev}/include"
+          --replace-fail "\''${CUTENSOR_ROOT}/include" "${cutensor.dev}/include"
       '';
 
       CUTENSOR_ROOT = cutensor;
diff --git a/pkgs/development/cuda-modules/cuda-samples/extension.nix b/pkgs/development/cuda-modules/cuda-samples/extension.nix
index d41da90cd5d0e..90a124f80fa73 100644
--- a/pkgs/development/cuda-modules/cuda-samples/extension.nix
+++ b/pkgs/development/cuda-modules/cuda-samples/extension.nix
@@ -1,6 +1,6 @@
 {
+  backendStdenv,
   cudaVersion,
-  hostPlatform,
   lib,
 }:
 let
@@ -26,7 +26,7 @@ let
   # Samples are built around the CUDA Toolkit, which is not available for
   # aarch64. Check for both CUDA version and platform.
   cudaVersionIsSupported = cudaVersionToHash ? ${cudaVersion};
-  platformIsSupported = hostPlatform.isx86_64;
+  platformIsSupported = backendStdenv.hostPlatform.isx86_64;
   isSupported = cudaVersionIsSupported && platformIsSupported;
 
   # Build our extension
diff --git a/pkgs/development/cuda-modules/cuda-samples/generic.nix b/pkgs/development/cuda-modules/cuda-samples/generic.nix
index 3d1dac015e16c..e2a33cd7839c9 100644
--- a/pkgs/development/cuda-modules/cuda-samples/generic.nix
+++ b/pkgs/development/cuda-modules/cuda-samples/generic.nix
@@ -14,6 +14,7 @@
 }:
 let
   inherit (lib) lists strings;
+  inherit (backendStdenv.hostPlatform.parsed) cpu kernel;
 in
 backendStdenv.mkDerivation (
   finalAttrs: {
@@ -64,7 +65,7 @@ backendStdenv.mkDerivation (
     installPhase = ''
       runHook preInstall
 
-      install -Dm755 -t $out/bin bin/${backendStdenv.hostPlatform.parsed.cpu.name}/${backendStdenv.hostPlatform.parsed.kernel.name}/release/*
+      install -Dm755 -t $out/bin bin/${cpu.name}/${kernel.name}/release/*
 
       runHook postInstall
     '';
diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix
index f43d649afbbf3..fe902e68f4000 100644
--- a/pkgs/development/cuda-modules/cuda/overrides.nix
+++ b/pkgs/development/cuda-modules/cuda/overrides.nix
@@ -1,53 +1,105 @@
-{cudaVersion, lib, addDriverRunpath}:
 let
-  inherit (lib) attrsets lists strings;
-  # cudaVersionOlder : Version -> Boolean
-  cudaVersionOlder = strings.versionOlder cudaVersion;
-  # cudaVersionAtLeast : Version -> Boolean
-  cudaVersionAtLeast = strings.versionAtLeast cudaVersion;
+  filterAndCreateOverrides =
+    createOverrideAttrs: final: prev:
+    let
+      # It is imperative that we use `final.callPackage` to create these overrides
+      # as it allows us access to the spliced package sets.
+      inherit (final) callPackage;
+
+      # NOTE(@connorbaker): We MUST use `lib` from `prev` because the attribute
+      # names CAN NOT depend on `final`.
+      inherit (prev.lib.attrsets) filterAttrs mapAttrs;
+      inherit (prev.lib.trivial) pipe;
+
+      # NOTE: Filter out attributes that are not present in the previous version of
+      # the package set. This is necessary to prevent the appearance of attributes
+      # like `cuda_nvcc` in `cudaPackages_10_0, which predates redistributables.
+      filterOutNewAttrs = filterAttrs (name: _: prev ? ${name});
 
-  addBuildInputs =
-    drv: buildInputs:
-    drv.overrideAttrs (prevAttrs: {buildInputs = prevAttrs.buildInputs ++ buildInputs;});
+      # NOTE: It is imperative that we use `final.callPackage` to perform overrides,
+      # as it allows us access to the spliced package sets.
+      # Apply callPackage to each attribute value, yielding a value to be passed
+      # to overrideAttrs.
+      callPackageThenOverrideAttrs = mapAttrs (
+        name: value: prev.${name}.overrideAttrs (callPackage value { })
+      );
+    in
+    pipe createOverrideAttrs [
+      filterOutNewAttrs
+      callPackageThenOverrideAttrs
+    ];
 in
-# NOTE: Filter out attributes that are not present in the previous version of
-# the package set. This is necessary to prevent the appearance of attributes
-# like `cuda_nvcc` in `cudaPackages_10_0, which predates redistributables.
-final: prev:
-attrsets.filterAttrs (attr: _: (builtins.hasAttr attr prev)) {
-  libcufile = prev.libcufile.overrideAttrs (
+# Each attribute name is the name of an existing package in the previous version
+# of the package set.
+# The value is a function (to be provided to callPackage), which yields a value
+# to be provided to overrideAttrs. This allows us to override the attributes of
+# a package without losing access to the fixed point of the package set --
+# especially useful given that some packages may depend on each other!
+filterAndCreateOverrides {
+  libcufile =
+    {
+      cudaOlder,
+      lib,
+      libcublas,
+      numactl,
+      rdma-core,
+    }:
     prevAttrs: {
       buildInputs = prevAttrs.buildInputs ++ [
-        final.libcublas.lib
-        final.pkgs.numactl
-        final.pkgs.rdma-core
+        libcublas.lib
+        numactl
+        rdma-core
       ];
       # Before 11.7 libcufile depends on itself for some reason.
       autoPatchelfIgnoreMissingDeps =
         prevAttrs.autoPatchelfIgnoreMissingDeps
-        ++ lists.optionals (cudaVersionOlder "11.7") [ "libcufile.so.0" ];
-    }
-  );
+        ++ lib.lists.optionals (cudaOlder "11.7") [ "libcufile.so.0" ];
+    };
 
-  libcusolver = addBuildInputs prev.libcusolver (
-    # Always depends on this
-    [final.libcublas.lib]
-    # Dependency from 12.0 and on
-    ++ lists.optionals (cudaVersionAtLeast "12.0") [final.libnvjitlink.lib]
-    # Dependency from 12.1 and on
-    ++ lists.optionals (cudaVersionAtLeast "12.1") [final.libcusparse.lib]
-  );
+  libcusolver =
+    {
+      cudaAtLeast,
+      lib,
+      libcublas,
+      libcusparse ? null,
+      libnvjitlink ? null,
+    }:
+    prevAttrs: {
+      buildInputs =
+        prevAttrs.buildInputs
+        # Always depends on this
+        ++ [ libcublas.lib ]
+        # Dependency from 12.0 and on
+        ++ lib.lists.optionals (cudaAtLeast "12.0") [ libnvjitlink.lib ]
+        # Dependency from 12.1 and on
+        ++ lib.lists.optionals (cudaAtLeast "12.1") [ libcusparse.lib ];
+    };
 
-  libcusparse = addBuildInputs prev.libcusparse (
-    lists.optionals (cudaVersionAtLeast "12.0") [final.libnvjitlink.lib]
-  );
+  libcusparse =
+    {
+      cudaAtLeast,
+      lib,
+      libnvjitlink ? null,
+    }:
+    prevAttrs: {
+      buildInputs =
+        prevAttrs.buildInputs
+        # Dependency from 12.0 and on
+        ++ lib.lists.optionals (cudaAtLeast "12.0") [ libnvjitlink.lib ];
+    };
 
-  cuda_cudart = prev.cuda_cudart.overrideAttrs (
+  # TODO(@connorbaker): cuda_cudart.dev depends on crt/host_config.h, which is from
+  # cuda_nvcc.dev. It would be nice to be able to encode that.
+  cuda_cudart =
+    { addDriverRunpath, lib }:
+    let
+      inherit (addDriverRunpath.__spliced.buildHost or addDriverRunpath) driverLink;
+    in
     prevAttrs: {
       # Remove once cuda-find-redist-features has a special case for libcuda
       outputs =
         prevAttrs.outputs
-        ++ lists.optionals (!(builtins.elem "stubs" prevAttrs.outputs)) [ "stubs" ];
+        ++ lib.lists.optionals (!(builtins.elem "stubs" prevAttrs.outputs)) [ "stubs" ];
 
       allowFHSReferences = false;
 
@@ -58,7 +110,7 @@ attrsets.filterAttrs (attr: _: (builtins.hasAttr attr prev)) {
           while IFS= read -r -d $'\0' path ; do
             sed -i \
               -e "s|^libdir\s*=.*/lib\$|libdir=''${!outputLib}/lib/stubs|" \
-              -e "s|^Libs\s*:\(.*\)\$|Libs: \1 -Wl,-rpath,${addDriverRunpath.driverLink}/lib|" \
+              -e "s|^Libs\s*:\(.*\)\$|Libs: \1 -Wl,-rpath,${driverLink}/lib|" \
               "$path"
           done < <(find -iname 'cuda-*.pc' -print0)
         ''
@@ -77,10 +129,10 @@ attrsets.filterAttrs (attr: _: (builtins.hasAttr attr prev)) {
           ln -s "$stubs"/lib/stubs/* "$stubs"/lib/
           ln -s "$stubs"/lib/stubs "''${!outputLib}/lib/stubs"
         '';
-    }
-  );
+    };
 
-  cuda_compat = prev.cuda_compat.overrideAttrs (
+  cuda_compat =
+    { flags, lib }:
     prevAttrs: {
       autoPatchelfIgnoreMissingDeps = prevAttrs.autoPatchelfIgnoreMissingDeps ++ [
         "libnvrm_gpu.so"
@@ -89,34 +141,41 @@ attrsets.filterAttrs (attr: _: (builtins.hasAttr attr prev)) {
       ];
       # `cuda_compat` only works on aarch64-linux, and only when building for Jetson devices.
       badPlatformsConditions = prevAttrs.badPlatformsConditions // {
-        "Trying to use cuda_compat on aarch64-linux targeting non-Jetson devices" =
-          !final.flags.isJetsonBuild;
+        "Trying to use cuda_compat on aarch64-linux targeting non-Jetson devices" = !flags.isJetsonBuild;
+      };
+      meta = prevAttrs.meta // {
+        # For cross-compilation, we need the hostPlatform to be included in order to fetch and build the package. This
+        # doesn't change the fact that it won't work on non-Jetson devices, so we only add it when building for Jetson.
+        platforms = prevAttrs.meta.platforms ++ lib.lists.optionals flags.isJetsonBuild [ "x86_64-linux" ];
       };
-    }
-  );
+    };
 
-  cuda_gdb = addBuildInputs prev.cuda_gdb (
-    # x86_64 only needs gmp from 12.0 and on
-    lists.optionals (cudaVersionAtLeast "12.0") [final.pkgs.gmp]
-  );
+  cuda_gdb =
+    {
+      cudaAtLeast,
+      gmp,
+      lib,
+    }:
+    prevAttrs: {
+      buildInputs =
+        prevAttrs.buildInputs
+        # x86_64 only needs gmp from 12.0 and on
+        ++ lib.lists.optionals (cudaAtLeast "11.0") [ gmp ];
+    };
 
-  cuda_nvcc = prev.cuda_nvcc.overrideAttrs (
-    oldAttrs:
+  cuda_nvcc =
+    {
+      backendStdenv,
+      cuda_cudart,
+      lib,
+      setupCudaHook,
+    }:
     let
-      # This replicates the logic in stdenvAdapters.useLibsFrom, except we use
-      # gcc from pkgsHostTarget and not from buildPackages.
-      ccForLibs-wrapper = final.pkgs.stdenv.cc;
-      gccMajorVersion = final.nvccCompatibilities.${cudaVersion}.gccMaxMajorVersion;
-      cc = final.pkgs.wrapCCWith {
-        cc = final.pkgs."gcc${gccMajorVersion}".cc;
-        useCcForLibs = true;
-        gccForLibs = ccForLibs-wrapper.cc;
-      };
+      # CC must come from the host environment, not the target environment because it is
+      # used at build time.
+      inherit (backendStdenv.__spliced.buildHost or backendStdenv) cc;
     in
-    {
-
-      outputs = oldAttrs.outputs ++ lists.optionals (!(builtins.elem "lib" oldAttrs.outputs)) [ "lib" ];
-
+    prevAttrs: {
       # Patch the nvcc.profile.
       # Syntax:
       # - `=` for assignment,
@@ -131,38 +190,40 @@ attrsets.filterAttrs (attr: _: (builtins.hasAttr attr prev)) {
       # backend-stdenv.nix
 
       postPatch =
-        (oldAttrs.postPatch or "")
+        (prevAttrs.postPatch or "")
         + ''
+          echo "Running the cuda_nvcc postPatch"
           substituteInPlace bin/nvcc.profile \
-            --replace \
-              '$(TOP)/lib' \
-              "''${!outputLib}/lib" \
-            --replace \
+            --replace-fail \
               '$(TOP)/$(_NVVM_BRANCH_)' \
               "''${!outputBin}/nvvm" \
-            --replace \
+            --replace-fail \
               '$(TOP)/$(_TARGET_DIR_)/include' \
               "''${!outputDev}/include"
 
           cat << EOF >> bin/nvcc.profile
 
           # Fix a compatible backend compiler
-          PATH += ${lib.getBin cc}/bin:
+          PATH += "${cc}/bin":
 
           # Expose the split-out nvvm
-          LIBRARIES =+ -L''${!outputBin}/nvvm/lib
-          INCLUDES =+ -I''${!outputBin}/nvvm/include
-
-          # Expose cudart and the libcuda stubs
-          LIBRARIES =+ -L$static/lib" "-L${final.cuda_cudart.lib}/lib -L${final.cuda_cudart.lib}/lib/stubs
-          INCLUDES =+ -I${final.cuda_cudart.dev}/include
+          LIBRARIES =+ "-L''${!outputBin}/nvvm/lib"
+          INCLUDES =+ "-I''${!outputBin}/nvvm/include"
           EOF
         '';
 
-      propagatedBuildInputs = [ final.setupCudaHook ];
+      # propagatedNativeBuildInputs = (prevAttrs.propagatedNativeBuildInputs or [ ]) ++ [ cc ];
+
+      # NOTE(@connorbaker):
+      # Though it might seem odd or counter-intuitive to add the setup hook to `propagatedBuildInputs` instead of
+      # `propagatedNativeBuildInputs`, it is necessary! If you move the setup hook from `propagatedBuildInputs` to
+      # `propagatedNativeBuildInputs`, it stops being propagated to downstream packages during their build because
+      # setup hooks in `propagatedNativeBuildInputs` are not designed to affect the runtime or build environment of
+      # dependencies; they are only meant to affect the build environment of the package that directly includes them.
+      # propagatedBuildInputs = (prevAttrs.propagatedBuildInputs or [ ]) ++ [ setupCudaHook ];
 
       postInstall =
-        (oldAttrs.postInstall or "")
+        (prevAttrs.postInstall or "")
         + ''
           moveToOutput "nvvm" "''${!outputBin}"
         '';
@@ -170,50 +231,69 @@ attrsets.filterAttrs (attr: _: (builtins.hasAttr attr prev)) {
       # The nvcc and cicc binaries contain hard-coded references to /usr
       allowFHSReferences = true;
 
-      meta = (oldAttrs.meta or { }) // {
+      meta = (prevAttrs.meta or { }) // {
         mainProgram = "nvcc";
       };
-    }
-  );
+    };
 
-  cuda_nvprof = prev.cuda_nvprof.overrideAttrs (
-    prevAttrs: {buildInputs = prevAttrs.buildInputs ++ [final.cuda_cupti.lib];}
-  );
+  cuda_nvprof =
+    { cuda_cupti }: prevAttrs: { buildInputs = prevAttrs.buildInputs ++ [ cuda_cupti.lib ]; };
 
-  cuda_demo_suite = addBuildInputs prev.cuda_demo_suite [
-    final.pkgs.freeglut
-    final.pkgs.libGLU
-    final.pkgs.libglvnd
-    final.pkgs.mesa
-    final.libcufft.lib
-    final.libcurand.lib
-  ];
-
-  nsight_compute = prev.nsight_compute.overrideAttrs (
+  cuda_demo_suite =
+    {
+      freeglut,
+      libcufft,
+      libcurand,
+      libGLU,
+      libglvnd,
+      mesa,
+    }:
     prevAttrs: {
-      nativeBuildInputs =
-        prevAttrs.nativeBuildInputs
-        ++ (
-          if (strings.versionOlder prev.nsight_compute.version "2022.2.0") then
-            [final.pkgs.qt5.wrapQtAppsHook]
-          else
-            [final.pkgs.qt6.wrapQtAppsHook]
-        );
-      buildInputs =
-        prevAttrs.buildInputs
-        ++ (
-          if (strings.versionOlder prev.nsight_compute.version "2022.2.0") then
-            [final.pkgs.qt5.qtwebview]
-          else
-            [final.pkgs.qt6.qtwebview]
-        );
-    }
-  );
+      buildInputs = prevAttrs.buildInputs ++ [
+        freeglut
+        libcufft.lib
+        libcurand.lib
+        libGLU
+        libglvnd
+        mesa
+      ];
+    };
 
-  nsight_systems = prev.nsight_systems.overrideAttrs (
+  nsight_compute =
+    {
+      lib,
+      qt5 ? null,
+      qt6 ? null,
+    }:
     prevAttrs:
     let
-      qt = if lib.versionOlder prevAttrs.version "2022.4.2.1" then final.pkgs.qt5 else final.pkgs.qt6;
+      qt = if lib.strings.versionOlder prevAttrs.version "2022.2.0" then qt5 else qt6;
+      inherit (qt) wrapQtAppsHook qtwebview;
+    in
+    {
+      nativeBuildInputs = prevAttrs.nativeBuildInputs ++ [ wrapQtAppsHook ];
+      buildInputs = prevAttrs.buildInputs ++ [ qtwebview ];
+    };
+
+  nsight_systems =
+    {
+      cuda_cudart,
+      cudaOlder,
+      gst_all_1,
+      lib,
+      nss,
+      numactl,
+      pulseaudio,
+      qt5 ? null,
+      qt6 ? null,
+      rdma-core,
+      ucx,
+      wayland,
+      xorg,
+    }:
+    prevAttrs:
+    let
+      qt = if lib.strings.versionOlder prevAttrs.version "2022.4.2.1" then qt5 else qt6;
       qtwayland =
         if lib.versions.major qt.qtbase.version == "5" then
           lib.getBin qt.qtwayland
@@ -244,36 +324,34 @@ attrsets.filterAttrs (attr: _: (builtins.hasAttr attr prev)) {
         '';
       nativeBuildInputs = prevAttrs.nativeBuildInputs ++ [ qt.wrapQtAppsHook ];
       buildInputs = prevAttrs.buildInputs ++ [
-        final.cuda_cudart.stubs
-        final.pkgs.alsa-lib
-        final.pkgs.boost178
-        final.pkgs.e2fsprogs
-        final.pkgs.gst_all_1.gst-plugins-base
-        final.pkgs.gst_all_1.gstreamer
-        final.pkgs.nss
-        final.pkgs.numactl
-        final.pkgs.pulseaudio
-        final.pkgs.rdma-core
-        final.pkgs.ucx
-        final.pkgs.wayland
-        final.pkgs.xorg.libXcursor
-        final.pkgs.xorg.libXdamage
-        final.pkgs.xorg.libXrandr
-        final.pkgs.xorg.libXtst
-        qt.qtbase
         (qt.qtdeclarative or qt.full)
         (qt.qtsvg or qt.full)
+        cuda_cudart.stubs
+        gst_all_1.gst-plugins-base
+        gst_all_1.gstreamer
+        nss
+        numactl
+        pulseaudio
+        qt.qtbase
         qtWaylandPlugins
+        rdma-core
+        ucx
+        wayland
+        xorg.libXcursor
+        xorg.libXdamage
+        xorg.libXrandr
+        xorg.libXtst
       ];
 
       # Older releases require boost 1.70 deprecated in Nixpkgs
-      meta.broken = prevAttrs.meta.broken or false || lib.versionOlder final.cudaVersion "11.8";
-    }
-  );
+      meta.broken = prevAttrs.meta.broken or false || cudaOlder "11.8";
+    };
 
-  nvidia_driver = prev.nvidia_driver.overrideAttrs {
-    # No need to support this package as we have drivers already
-    # in linuxPackages.
-    meta.broken = true;
-  };
+  nvidia_driver =
+    { }:
+    {
+      # No need to support this package as we have drivers already
+      # in linuxPackages.
+      meta.broken = true;
+    };
 }
diff --git a/pkgs/development/cuda-modules/cutensor/extension.nix b/pkgs/development/cuda-modules/cutensor/extension.nix
index 534941887c6e4..29959fc013f99 100644
--- a/pkgs/development/cuda-modules/cutensor/extension.nix
+++ b/pkgs/development/cuda-modules/cutensor/extension.nix
@@ -13,9 +13,9 @@
 # - Instead of providing different releases for each version of CUDA, CuTensor has multiple subdirectories in `lib`
 #   -- one for each version of CUDA.
 {
+  backendStdenv,
   cudaVersion,
   flags,
-  hostPlatform,
   lib,
   mkVersionedPackageName,
 }:
@@ -93,7 +93,7 @@ let
   # LibPath are not constant across the same release -- one platform may support fewer
   # CUDA versions than another.
   # redistArch :: String
-  redistArch = flags.getRedistArch hostPlatform.system;
+  redistArch = flags.getRedistArch backendStdenv.hostPlatform.system;
   # platformIsSupported :: Manifests -> Boolean
   platformIsSupported =
     {feature, ...}:
diff --git a/pkgs/development/cuda-modules/flags.nix b/pkgs/development/cuda-modules/flags.nix
index d5e01be01fd51..495ba647feb1b 100644
--- a/pkgs/development/cuda-modules/flags.nix
+++ b/pkgs/development/cuda-modules/flags.nix
@@ -2,12 +2,13 @@
 # Gpu :: AttrSet
 #   - See the documentation in ./gpus.nix.
 {
+  backendStdenv,
   config,
   cudaCapabilities ? (config.cudaCapabilities or []),
   cudaForwardCompat ? (config.cudaForwardCompat or true),
   lib,
+  cudaAtLeast,
   cudaVersion,
-  hostPlatform,
   # gpus :: List Gpu
   gpus,
 }:
@@ -20,6 +21,12 @@ let
     trivial
     ;
 
+  inherit (backendStdenv)
+    buildPlatform
+    hostPlatform
+    targetPlatform
+    ;
+
   # Flags are determined based on your CUDA toolkit by default.  You may benefit
   # from improved performance, reduced file size, or greater hardware support by
   # passing a configuration based on your specific GPU environment.
@@ -42,7 +49,7 @@ let
     gpu:
     let
       inherit (gpu) minCudaVersion maxCudaVersion;
-      lowerBoundSatisfied = strings.versionAtLeast cudaVersion minCudaVersion;
+      lowerBoundSatisfied = cudaAtLeast minCudaVersion;
       upperBoundSatisfied =
         (maxCudaVersion == null) || !(strings.versionOlder maxCudaVersion cudaVersion);
     in
@@ -216,21 +223,24 @@ let
             lists.filter (cap: !(builtins.elem cap requestedJetsonDevices))
               cudaCapabilities;
           jetsonBuildSufficientCondition = requestedJetsonDevices != [];
-          jetsonBuildNecessaryCondition = requestedNonJetsonDevices == [] && hostPlatform.isAarch64;
+          jetsonBuildNecessaryCondition = requestedNonJetsonDevices == [];
         in
         trivial.throwIf (jetsonBuildSufficientCondition && !jetsonBuildNecessaryCondition)
           ''
-            Jetson devices cannot be targeted with non-Jetson devices. Additionally, they require hostPlatform to be aarch64.
-            You requested ${builtins.toJSON cudaCapabilities} for host platform ${hostPlatform.system}.
+            Jetson devices cannot be targeted with non-Jetson devices. Additionally, host platform
+            and target platform must be aarch64.
+            You requested ${builtins.toJSON cudaCapabilities} for:
+            - Build platform ${buildPlatform.system}
+            - Host platform ${hostPlatform.system}
+            - Target platform ${targetPlatform.system}
             Requested Jetson devices: ${builtins.toJSON requestedJetsonDevices}.
             Requested non-Jetson devices: ${builtins.toJSON requestedNonJetsonDevices}.
-            Exactly one of the following must be true:
-            - All CUDA capabilities belong to Jetson devices and hostPlatform is aarch64.
-            - No CUDA capabilities belong to Jetson devices.
             See ${./gpus.nix} for a list of architectures supported by this version of Nixpkgs.
           ''
           jetsonBuildSufficientCondition
-        && jetsonBuildNecessaryCondition;
+        && jetsonBuildNecessaryCondition
+        && hostPlatform.isAarch64
+        && targetPlatform.isAarch64;
     };
 in
 # When changing names or formats: pause, validate, and update the assert
@@ -277,7 +287,7 @@ assert let
   };
   actualWrapped = (builtins.tryEval (builtins.deepSeq actual actual)).value;
 in
-asserts.assertMsg ((strings.versionAtLeast cudaVersion "11.2") -> (expected == actualWrapped)) ''
+asserts.assertMsg ((cudaAtLeast "11.2") -> (expected == actualWrapped)) ''
   This test should only fail when using a version of CUDA older than 11.2, the first to support
   8.6.
   Expected: ${builtins.toJSON expected}
@@ -300,59 +310,6 @@ asserts.assertMsg (expected == actualWrapped) ''
   Expected: ${builtins.toJSON expected}
   Actual: ${builtins.toJSON actualWrapped}
 '';
-# Check Jetson-only
-assert let
-  expected = {
-    cudaCapabilities = [
-      "6.2"
-      "7.2"
-    ];
-    enableForwardCompat = true;
-
-    archNames = [
-      "Pascal"
-      "Volta"
-    ];
-    realArches = [
-      "sm_62"
-      "sm_72"
-    ];
-    virtualArches = [
-      "compute_62"
-      "compute_72"
-    ];
-    arches = [
-      "sm_62"
-      "sm_72"
-      "compute_72"
-    ];
-
-    gencode = [
-      "-gencode=arch=compute_62,code=sm_62"
-      "-gencode=arch=compute_72,code=sm_72"
-      "-gencode=arch=compute_72,code=compute_72"
-    ];
-    gencodeString = "-gencode=arch=compute_62,code=sm_62 -gencode=arch=compute_72,code=sm_72 -gencode=arch=compute_72,code=compute_72";
-
-    isJetsonBuild = true;
-  };
-  actual = formatCapabilities {
-    cudaCapabilities = [
-      "6.2"
-      "7.2"
-    ];
-  };
-  actualWrapped = (builtins.tryEval (builtins.deepSeq actual actual)).value;
-in
-asserts.assertMsg
-  # We can't do this test unless we're targeting aarch64
-  (hostPlatform.isAarch64 -> (expected == actualWrapped))
-  ''
-    Jetson devices can only be built with other Jetson devices.
-    Both 6.2 and 7.2 are Jetson devices.
-    Expected: ${builtins.toJSON expected}
-    Actual: ${builtins.toJSON actualWrapped}
-  '';
 {
   # formatCapabilities :: { cudaCapabilities: List Capability, enableForwardCompat: Boolean } ->  { ... }
   inherit formatCapabilities;
diff --git a/pkgs/development/cuda-modules/generic-builders/manifest.nix b/pkgs/development/cuda-modules/generic-builders/manifest.nix
index 4f40b7f01dc28..64850841d995b 100644
--- a/pkgs/development/cuda-modules/generic-builders/manifest.nix
+++ b/pkgs/development/cuda-modules/generic-builders/manifest.nix
@@ -10,7 +10,6 @@
   markForCudatoolkitRootHook,
   flags,
   stdenv,
-  hostPlatform,
   # Builder-specific arguments
   # Short package name (e.g., "cuda_cccl")
   # pname : String
@@ -32,7 +31,6 @@ let
   inherit (lib)
     attrsets
     lists
-    meta
     strings
     trivial
     licenses
@@ -40,15 +38,19 @@ let
     sourceTypes
     ;
 
+  inherit (backendStdenv) hostPlatform;
+
   # Get the redist architectures for which package provides distributables.
   # These are used by meta.platforms.
   supportedRedistArchs = builtins.attrNames featureRelease;
-  # redistArch :: String
-  # The redistArch is the name of the architecture for which the redistributable is built.
-  # It is `"unsupported"` if the redistributable is not supported on the target platform.
-  redistArch = flags.getRedistArch hostPlatform.system;
 
-  sourceMatchesHost = flags.getNixSystem redistArch == stdenv.hostPlatform.system;
+  # hostPlatformRedistArch :: String
+  # The hostPlatformRedistArch is the name of the architecture for which the redistributable is built.
+  # It is `"unsupported"` if the redistributable is not supported on the hostPlatform.
+  hostPlatformRedistArch = flags.getRedistArch hostPlatform.system;
+
+  # sourceMatchesHost :: Bool
+  sourceMatchesHost = flags.getNixSystem hostPlatformRedistArch == hostPlatform.system;
 in
 backendStdenv.mkDerivation (
   finalAttrs: {
@@ -76,7 +78,7 @@ backendStdenv.mkDerivation (
           output:
           attrsets.attrByPath
             [
-              redistArch
+              hostPlatformRedistArch
               "outputs"
               output
             ]
@@ -96,12 +98,12 @@ backendStdenv.mkDerivation (
         # NOTE: In the case the redistributable isn't supported on the target platform,
         # we will have `outputs = [ "out" ] ++ possibleOutputs`. This is of note because platforms which
         # aren't supported would otherwise have evaluation errors when trying to access outputs other than `out`.
-        # The alternative would be to have `outputs = [ "out" ]` when`redistArch = "unsupported"`, but that would
+        # The alternative would be to have `outputs = [ "out" ]` when`hostPlatformRedistArch = "unsupported"`, but that would
         # require adding guards throughout the entirety of the CUDA package set to ensure `cudaSupport` is true --
         # recall that OfBorg will evaluate packages marked as broken and that `cudaPackages` will be evaluated with
         # `cudaSupport = false`!
         additionalOutputs =
-          if redistArch == "unsupported"
+          if hostPlatformRedistArch == "unsupported"
           then possibleOutputs
           else builtins.filter hasOutput possibleOutputs;
         # The out output is special -- it's the default output and we always include it.
@@ -133,7 +135,18 @@ backendStdenv.mkDerivation (
     # brokenConditions :: AttrSet Bool
     # Sets `meta.broken = true` if any of the conditions are true.
     # Example: Broken on a specific version of CUDA or when a dependency has a specific version.
-    brokenConditions = { };
+    brokenConditions = {
+      # Unclear how this is handled by Nix internals.
+      "Duplicate entries in outputs" = finalAttrs.outputs != lists.unique finalAttrs.outputs;
+      # Typically this results in the static output being empty, as all libraries are moved
+      # back to the lib output.
+      "lib output follows static output" =
+        let
+          libIndex = lists.findFirstIndex (x: x == "lib") null finalAttrs.outputs;
+          staticIndex = lists.findFirstIndex (x: x == "static") null finalAttrs.outputs;
+        in
+        libIndex != null && staticIndex != null && libIndex > staticIndex;
+    };
 
     # badPlatformsConditions :: AttrSet Bool
     # Sets `meta.badPlatforms = meta.platforms` if any of the conditions are true.
@@ -143,23 +156,22 @@ backendStdenv.mkDerivation (
     };
 
     # src :: Optional Derivation
-    src = trivial.pipe redistArch [
-      # If redistArch doesn't exist in redistribRelease, return null.
-      (redistArch: redistribRelease.${redistArch} or null)
-      # If the release is non-null, fetch the source; otherwise, return null.
-      (trivial.mapNullable (
-        { relative_path, sha256, ... }:
-        fetchurl {
-          url = "https://developer.download.nvidia.com/compute/${redistName}/redist/${relative_path}";
-          inherit sha256;
-        }
-      ))
-    ];
+    src =
+      trivial.mapNullable
+        (
+          { relative_path, sha256, ... }:
+          fetchurl {
+            url = "https://developer.download.nvidia.com/compute/${redistName}/redist/${relative_path}";
+            inherit sha256;
+          }
+        )
+        (redistribRelease.${hostPlatformRedistArch} or null);
 
     # Handle the pkg-config files:
     # 1. No FHS
     # 2. Location expected by the pkg-config wrapper
     # 3. Generate unversioned names too
+    # TODO(@connorbaker): Not all packages have a lib or dev output, so we should check for their existence.
     postPatch = ''
       for path in pkg-config pkgconfig ; do
         [[ -d "$path" ]] || continue
@@ -187,23 +199,26 @@ backendStdenv.mkDerivation (
     # We do need some other phases, like configurePhase, so the multiple-output setup hook works.
     dontBuild = true;
 
-    nativeBuildInputs = [
-      autoPatchelfHook
-      # This hook will make sure libcuda can be found
-      # in typically /lib/opengl-driver by adding that
-      # directory to the rpath of all ELF binaries.
-      # Check e.g. with `patchelf --print-rpath path/to/my/binary
-      autoAddDriverRunpath
-      markForCudatoolkitRootHook
-    ]
-    # autoAddCudaCompatRunpath depends on cuda_compat and would cause
-    # infinite recursion if applied to `cuda_compat` itself (beside the fact
-    # that it doesn't make sense in the first place)
-    ++ lib.optionals (pname != "cuda_compat" && flags.isJetsonBuild) [
-      # autoAddCudaCompatRunpath must appear AFTER autoAddDriverRunpath.
-      # See its documentation in ./setup-hooks/extension.nix.
-      autoAddCudaCompatRunpath
-    ];
+    nativeBuildInputs =
+      [
+        # To create fat outputs from each component and find a version of `lndir` built for the host platform.
+        lndir
+      ]
+      ++ [
+        # Patchelf is used to fix the rpath of the binaries.
+        autoPatchelfHook
+        # (autoPatchelfHook.__spliced.buildHost or autoPatchelfHook)
+
+        # This hook will make sure libcuda can be found in typically
+        # /lib/opengl-driver by adding that directory to the rpath of all ELF
+        # binaries. Check e.g. with `patchelf --print-rpath path/to/my/binary
+        autoAddDriverRunpath
+        # (autoAddDriverRunpath.__spliced.buildHost or autoAddDriverRunpath)
+
+        # Mark the CUDA toolkit root directory for the CUDA compatibility libraries
+        markForCudatoolkitRootHook
+        # (markForCudatoolkitRootHook.__spliced.buildHost or markForCudatoolkitRootHook)
+      ];
 
     buildInputs =
       [
@@ -212,6 +227,17 @@ backendStdenv.mkDerivation (
         # nvcc forces us to use an older gcc
         # NB: We don't actually know if this is the right thing to do
         stdenv.cc.cc.lib
+      ]
+      # autoAddCudaCompatRunpath depends on cuda_compat and would cause
+      # infinite recursion if applied to `cuda_compat` itself (beside the fact
+      # that it doesn't make sense in the first place)
+      ++ lib.optionals (pname != "cuda_compat" && flags.isJetsonBuild) [
+        # autoAddCudaCompatRunpath must appear AFTER autoAddDriverRunpath.
+        # See its documentation in ./setup-hooks/auto-add-cuda-compat-runpath-hook/default.nix.
+        # NOTE(@connorbaker): If autoAddCudaCompatRunpath is in nativeBuildInputs, it tries to use cuda_compat
+        # from buildPackages, but we need to use the one from pkgs (pkgsHostTarget).
+        # We can either use autoAddCudaCompatRunpath.__spliced.hostTarget or move it to buildInputs.
+        autoAddCudaCompatRunpath
       ];
 
     # Picked up by autoPatchelf
@@ -296,11 +322,14 @@ backendStdenv.mkDerivation (
 
     # For each output, create a symlink to it in the out output.
     # NOTE: We must recreate the out output here, because the setup hook will have deleted it if it was empty.
+    # NOTE: Rely on nativeBuildInputs adding lndir to the path because meta.getExe has no concept of spliced
+    # attributes and will select the hostPlatform variant instead of the buildPlatform variant.
+    # TODO(@connorbaker): This should be removed when https://github.com/NixOS/nixpkgs/issues/271792 is resolved.
     postPatchelf = ''
       mkdir -p "$out"
       for output in $(getAllOutputNames); do
         if [[ "$output" != "out" ]]; then
-          ${meta.getExe lndir} "''${!output}" "$out"
+          lndir "''${!output}" "$out"
         fi
       done
     '';
diff --git a/pkgs/development/cuda-modules/generic-builders/multiplex.nix b/pkgs/development/cuda-modules/generic-builders/multiplex.nix
index f2a9c6840ecd0..1cb6d8462b7dc 100644
--- a/pkgs/development/cuda-modules/generic-builders/multiplex.nix
+++ b/pkgs/development/cuda-modules/generic-builders/multiplex.nix
@@ -1,9 +1,9 @@
 {
   # callPackage-provided arguments
+  backendStdenv,
   lib,
   cudaVersion,
   flags,
-  hostPlatform,
   # Expected to be passed by the caller
   mkVersionedPackageName,
   # pname :: String
@@ -74,7 +74,7 @@ let
   # Get all of the packages for our given platform.
   # redistArch :: String
   # Value is `"unsupported"` if the platform is not supported.
-  redistArch = flags.getRedistArch hostPlatform.system;
+  redistArch = flags.getRedistArch backendStdenv.hostPlatform.system;
 
   preferable =
     p1: p2: (isSupported p2 -> isSupported p1) && (strings.versionAtLeast p1.version p2.version);
diff --git a/pkgs/development/cuda-modules/nccl/default.nix b/pkgs/development/cuda-modules/nccl/default.nix
index e3d10b79386f9..f7a3f78b9bd15 100644
--- a/pkgs/development/cuda-modules/nccl/default.nix
+++ b/pkgs/development/cuda-modules/nccl/default.nix
@@ -35,6 +35,7 @@ backendStdenv.mkDerivation (
     };
 
     strictDeps = true;
+    __structuredAttrs = true;
 
     outputs = [
       "out"
@@ -66,22 +67,20 @@ backendStdenv.mkDerivation (
 
     preConfigure = ''
       patchShebangs ./src/device/generate.py
-      makeFlagsArray+=(
-        "NVCC_GENCODE=${lib.concatStringsSep " " cudaFlags.gencode}"
-      )
     '';
 
+    # NOTE(@connorbaker): When referencing packages, make sure to use the spliced version corresponding to
+    # buildPackages instead of pkgs (the default).
     makeFlags =
-      ["PREFIX=$(out)"]
+      [
+        "PREFIX=$(out)"
+        "NVCC_GENCODE=${lib.concatStringsSep " " cudaFlags.gencode}"
+      ]
       ++ lib.optionals (lib.versionOlder cudaVersion "11.4") [
-        "CUDA_HOME=${cudatoolkit}"
-        "CUDA_LIB=${lib.getLib cudatoolkit}/lib"
-        "CUDA_INC=${lib.getDev cudatoolkit}/include"
+        "CUDA_HOME=${cudatoolkit.__spliced.buildHost or cudatoolkit}"
       ]
       ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [
-        "CUDA_HOME=${cuda_nvcc}"
-        "CUDA_LIB=${lib.getLib cuda_cudart}/lib"
-        "CUDA_INC=${lib.getDev cuda_cudart}/include"
+        "CUDA_HOME=${cuda_nvcc.__spliced.buildHost or cuda_nvcc}"
       ];
 
     enableParallelBuilding = true;
diff --git a/pkgs/development/cuda-modules/saxpy/default.nix b/pkgs/development/cuda-modules/saxpy/default.nix
index bc299dea006f4..b457ee34b0586 100644
--- a/pkgs/development/cuda-modules/saxpy/default.nix
+++ b/pkgs/development/cuda-modules/saxpy/default.nix
@@ -10,13 +10,12 @@ let
     cuda_cccl
     cuda_cudart
     cuda_nvcc
+    cudaAtLeast
+    cudaOlder
     cudatoolkit
-    cudaVersion
     flags
     libcublas
-    setupCudaHook
     ;
-  inherit (lib) getDev getLib getOutput;
 in
 backendStdenv.mkDerivation {
   pname = "saxpy";
@@ -31,21 +30,116 @@ backendStdenv.mkDerivation {
       cmake
       autoAddDriverRunpath
     ]
-    ++ lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit]
-    ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [cuda_nvcc];
-
-  buildInputs =
-    lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit]
-    ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [
-      (getDev libcublas)
-      (getLib libcublas)
-      (getOutput "static" libcublas)
-      cuda_cudart
-    ]
-    ++ lib.optionals (lib.versionAtLeast cudaVersion "12.0") [cuda_cccl];
+    ++ lib.optionals (cudaOlder "11.4") [ cudatoolkit ]
+    ++ lib.optionals (cudaAtLeast "11.4") [ cuda_nvcc ];
+
+  # buildInputs =
+  #   lib.optionals (cudaOlder "11.4") [ cudatoolkit ]
+  #   ++ lib.optionals (cudaAtLeast "11.4") [
+  #     cuda_cudart
+  #     libcublas
+  #     # libcublas.dev
+  #     # libcublas.lib
+  #   ]
+  #   ++ lib.optionals (cudaAtLeast "12.0") [ cuda_cccl ];
+
+  # TODO: CMake tells us CUDA_HOST_COMPILER is an unused variable; CMAKE_CUDA_HOST_COMPILER is used and we can set it.
+  # TODO: CMake tells us CUDAToolkit_INCLUDE_DIR is an unused variable; CUDAToolkit_INCLUDE_DIRS is used and we can set it.
+  # TODO: What is the difference between CUDA_CUDA_COMPILER and CMAKE_CUDA_HOST_COMPILER, or CUDACXX and CUDAHOSTCXX?
+  # TODO: The CUDA compiler source identification process used by CMake requires building and running a test program. This is not possible in a cross-compilation environment. We can use CMAKE_CUDA_FLAGS_INIT to get around it.
+  # TODO: Why aren't any of these correctly configured by the environment?
+  # TODO: See whether CUDAToolkit_INCLUDE_DIR etc is necessary, or just the LIBRARY_PATH and LD_LIBRARY_PATH.
+  # TODO: /nix/store/j2y057vz3i19yh4zjsan1s3q256q15rd-binutils-2.41/bin/ld: /nix/store/gh1azxmwdisz1q92h1hw20w9l72gwza7-libcublas-aarch64-unknown-linux-gnu-12.2.5.6-lib/lib/libcublas.so: error adding symbols: file in wrong format
+  preConfigure =
+    let
+      inherit (backendStdenv.__spliced.buildHost) cc;
+      ccFullPath = "${cc}/bin/${cc.targetPrefix}c++";
+      ccRoot = "${cc}";
+      nvccBuildHost = cuda_nvcc.__spliced.buildHost;
+      cudartBuildHost = cuda_cudart.__spliced.buildHost;
+
+      cudartHostTarget = cuda_cudart.__spliced.hostTarget;
+      ccclHostTarget = cuda_cccl.__spliced.hostTarget;
+      libcublasHostTarget = libcublas.__spliced.hostTarget;
+    in
+    # Working (until linker error)
+    # export NVCC_PREPEND_FLAGS+=" -I${cudartHostTarget}/include -I${ccclHostTarget}/include -L${cudartHostTarget}/lib -L${ccclHostTarget}/lib"
+    # export LIBRARY_PATH+="${cudartHostTarget}/lib"
+    # export LD_LIBRARY_PATH+="${cudartHostTarget}/lib"
+    # export CPATH="$CUDAToolkit_INCLUDE_DIRS"
+    #
+    # Ripped from setup-cuda-hook::setupCUDAToolkitCompilers, added logging
+    ''
+      # Name function never needs to have return value checked.
+      # shellcheck disable=SC2155
+
+      for path in "${cudartHostTarget}" "${ccclHostTarget}" "${libcublasHostTarget}" "${nvccBuildHost}"; do
+        if [[ -d "$path" ]]; then
+          echo "Adding $path to CUDAToolkit search path"
+          addToSearchPathWithCustomDelimiter ";" CUDAToolkit_ROOT "$path"
+          echo "CUDAToolkit_ROOT is now $CUDAToolkit_ROOT"
+        else
+          echo "Skipping $path as it is not a directory"
+        fi
+
+        if [[ -d "$path/include" ]]; then
+          echo "Adding $path/include to CUDAToolkit search path"
+          addToSearchPathWithCustomDelimiter ";" CUDAToolkit_INCLUDE_DIRS "$path/include"
+          echo "CUDAToolkit_INCLUDE_DIRS is now $CUDAToolkit_INCLUDE_DIRS"
+        else
+          echo "Skipping $path/include as it is not a directory"
+        fi
+      done
+
+      export cmakeFlagsArray+=(
+        -DCUDAToolkit_INCLUDE_DIRS="''${CUDAToolkit_INCLUDE_DIRS:-}"
+        -DCUDAToolkit_ROOT="''${CUDAToolkit_ROOT:-}"
+      )
+    ''
+    # Try to export the include dirs to CPATH, replacing the semicolons with colons
+    + ''
+      export CPATH="''${CUDAToolkit_INCLUDE_DIRS//;/:}"
+      echo "CPATH is now $CPATH"
+    ''
+    # Ripped from setup-cuda-hook::setupCUDAToolkitCompilers
+    + ''
+      # Point NVCC at a compatible compiler
+
+      # For CMake-based projects:
+      # https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
+      # https://cmake.org/cmake/help/latest/envvar/CUDAHOSTCXX.html
+      # https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_HOST_COMPILER.html
+
+      export cmakeFlagsArray+=(
+        -DCMAKE_CUDA_HOST_COMPILER="${ccFullPath}"
+      )
+
+      # For non-CMake projects:
+      # We prepend --compiler-bindir to nvcc flags.
+      # Downstream packages can override these, because NVCC
+      # uses the last --compiler-bindir it gets on the command line.
+      # FIXME: this results in "incompatible redefinition" warnings.
+      # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin
+      export CUDAHOSTCXX="${ccFullPath}"
+
+      export NVCC_PREPEND_FLAGS+=" --compiler-bindir=${ccRoot}/bin"
+
+      # NOTE: We set -Xfatbin=-compress-all, which reduces the size of the compiled
+      # binaries. If binaries grow over 2GB, they will fail to link. This is a problem for us, as
+      # the default set of CUDA capabilities we build can regularly cause this to occur (for
+      # example, with Magma).
+      #
+      # @SomeoneSerge: original comment was made by @ConnorBaker in .../cudatoolkit/common.nix
+      export NVCC_PREPEND_FLAGS+=" -Xfatbin=-compress-all"
+    ''
+    # Try to get around compiler initialization via CMAKE_CUDA_FLAGS_INIT
+    + ''
+      export cmakeFlagsArray+=(
+        -DCMAKE_CUDA_FLAGS_INIT="-L${cudartBuildHost}/lib -I${cudartBuildHost}/include"
+      )
+    '';
 
   cmakeFlags = [
-    (lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true)
     (lib.cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
       with flags; lib.concatStringsSep ";" (lib.lists.map dropDot cudaCapabilities)
     ))
@@ -56,6 +150,6 @@ backendStdenv.mkDerivation {
     license = lib.licenses.mit;
     maintainers = lib.teams.cuda.members;
     platforms = lib.platforms.unix;
-    badPlatforms = lib.optionals flags.isJetsonBuild platforms;
+    badPlatforms = lib.optionals (flags.isJetsonBuild && cudaOlder "11.4") platforms;
   };
 }
diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath.sh b/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath-hook/auto-add-cuda-compat-runpath-hook.sh
similarity index 100%
rename from pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath.sh
rename to pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath-hook/auto-add-cuda-compat-runpath-hook.sh
diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath-hook/default.nix b/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath-hook/default.nix
new file mode 100644
index 0000000000000..8209f02953c3e
--- /dev/null
+++ b/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath-hook/default.nix
@@ -0,0 +1,24 @@
+# autoAddCudaCompatRunpath hook must be added AFTER `setupCudaHook`. Both
+# hooks prepend a path with `libcuda.so` to the `DT_RUNPATH` section of
+# patched elf files, but `cuda_compat` path must take precedence (otherwise,
+# it doesn't have any effect) and thus appear first. Meaning this hook must be
+# executed last.
+{
+  autoFixElfFiles,
+  cuda_compat ? null,
+  flags,
+  lib,
+  makeSetupHook,
+}:
+makeSetupHook
+  {
+    name = "auto-add-cuda-compat-runpath-hook";
+    propagatedBuildInputs = [ autoFixElfFiles ];
+    substitutions.libcudaPath = lib.optionalString flags.isJetsonBuild "${cuda_compat}/compat";
+    meta = {
+      broken = !flags.isJetsonBuild;
+      badPlatforms = lib.optionals (cuda_compat == null) lib.platforms.all;
+      platforms = cuda_compat.meta.platforms or [ ];
+    };
+  }
+  ./auto-add-cuda-compat-runpath-hook.sh
diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook.sh b/pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook/auto-add-driver-runpath-hook.sh
similarity index 100%
rename from pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook.sh
rename to pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook/auto-add-driver-runpath-hook.sh
diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook/default.nix b/pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook/default.nix
new file mode 100644
index 0000000000000..97d020b2129d5
--- /dev/null
+++ b/pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook/default.nix
@@ -0,0 +1,14 @@
+{
+  addDriverRunpath,
+  autoFixElfFiles,
+  makeSetupHook,
+}:
+makeSetupHook
+  {
+    name = "auto-add-opengl-runpath-hook";
+    propagatedBuildInputs = [
+      addDriverRunpath
+      autoFixElfFiles
+    ];
+  }
+  ./auto-add-driver-runpath-hook.sh
diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files.sh b/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files-hook/auto-fix-elf-files-hook.sh
similarity index 97%
rename from pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files.sh
rename to pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files-hook/auto-fix-elf-files-hook.sh
index 1d57dfb17a66d..084c14016fc0b 100644
--- a/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files.sh
+++ b/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files-hook/auto-fix-elf-files-hook.sh
@@ -2,7 +2,7 @@
 # List all dynamically linked ELF files in the outputs and apply a generic fix
 # action provided as a parameter (currently used to add the CUDA or the
 # cuda_compat driver to the runpath of binaries)
-echo "Sourcing cuda/fix-elf-files.sh"
+echo "Sourcing auto-fix-elf-files-hook"
 
 # Returns the exit code of patchelf --print-rpath.
 # A return code of 0 (success) means the ELF file has a dynamic section, while
diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files-hook/default.nix b/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files-hook/default.nix
new file mode 100644
index 0000000000000..4550dc80edaef
--- /dev/null
+++ b/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files-hook/default.nix
@@ -0,0 +1,4 @@
+# Helper hook used in both autoAddCudaCompatRunpath and
+# autoAddDriverRunpath that applies a generic patching action to all elf
+# files with a dynamic linking section.
+{ makeSetupHook }: makeSetupHook { name = "auto-fix-elf-files-hook"; } ./auto-fix-elf-files-hook.sh
diff --git a/pkgs/development/cuda-modules/setup-hooks/extension.nix b/pkgs/development/cuda-modules/setup-hooks/extension.nix
index ece70da52b027..57dc92900e77c 100644
--- a/pkgs/development/cuda-modules/setup-hooks/extension.nix
+++ b/pkgs/development/cuda-modules/setup-hooks/extension.nix
@@ -1,94 +1,11 @@
 final: _: {
-  # Helper hook used in both autoAddCudaCompatRunpath and
-  # autoAddDriverRunpath that applies a generic patching action to all elf
-  # files with a dynamic linking section.
-  autoFixElfFiles =
-    final.callPackage
-      (
-        {makeSetupHook}:
-         makeSetupHook
-          {
-            name = "auto-fix-elf-files";
-          }
-          ./auto-fix-elf-files.sh
-      )
-      {};
-
-  # Internal hook, used by cudatoolkit and cuda redist packages
-  # to accommodate automatic CUDAToolkit_ROOT construction
-  markForCudatoolkitRootHook =
-    final.callPackage
-      (
-        {makeSetupHook}:
-        makeSetupHook {name = "mark-for-cudatoolkit-root-hook";} ./mark-for-cudatoolkit-root-hook.sh
-      )
-      {};
-
-  # Currently propagated by cuda_nvcc or cudatoolkit, rather than used directly
-  setupCudaHook =
-    (final.callPackage
-      (
-        {makeSetupHook, backendStdenv}:
-        makeSetupHook
-          {
-            name = "setup-cuda-hook";
-
-            substitutions.setupCudaHook = placeholder "out";
-
-            # Point NVCC at a compatible compiler
-            substitutions.ccRoot = "${backendStdenv.cc}";
-
-            # Required in addition to ccRoot as otherwise bin/gcc is looked up
-            # when building CMakeCUDACompilerId.cu
-            substitutions.ccFullPath = "${backendStdenv.cc}/bin/${backendStdenv.cc.targetPrefix}c++";
-          }
-          ./setup-cuda-hook.sh
-      )
-      {}
-    );
-
-  autoAddDriverRunpath =
-    final.callPackage
-      (
-        {addDriverRunpath, autoFixElfFiles, makeSetupHook}:
-        makeSetupHook
-          {
-            name = "auto-add-opengl-runpath-hook";
-            propagatedBuildInputs = [addDriverRunpath autoFixElfFiles];
-          }
-          ./auto-add-driver-runpath-hook.sh
-      )
-      {};
+  autoAddCudaCompatRunpath = final.callPackage ./auto-add-cuda-compat-runpath-hook { };
+  autoAddDriverRunpath = final.callPackage ./auto-add-driver-runpath-hook { };
+  autoFixElfFiles = final.callPackage ./auto-fix-elf-files-hook { };
+  markForCudatoolkitRootHook = final.callPackage ./mark-for-cudatoolkit-root-hook { };
+  setupCudaHook = final.callPackage ./setup-cuda-hook { };
 
+  # Aliases
   # Deprecated: an alias kept for compatibility. Consider removing after 24.11
   autoAddOpenGLRunpathHook = final.autoAddDriverRunpath;
-
-  # autoAddCudaCompatRunpath hook must be added AFTER `setupCudaHook`. Both
-  # hooks prepend a path with `libcuda.so` to the `DT_RUNPATH` section of
-  # patched elf files, but `cuda_compat` path must take precedence (otherwise,
-  # it doesn't have any effect) and thus appear first. Meaning this hook must be
-  # executed last.
-  autoAddCudaCompatRunpath =
-    final.callPackage
-      (
-        {makeSetupHook, autoFixElfFiles, cuda_compat ? null }:
-        makeSetupHook
-          {
-            name = "auto-add-cuda-compat-runpath-hook";
-            propagatedBuildInputs = [autoFixElfFiles];
-
-            substitutions = {
-              # Hotfix Ofborg evaluation
-              libcudaPath = if final.flags.isJetsonBuild then "${cuda_compat}/compat" else null;
-            };
-
-            meta.broken = !final.flags.isJetsonBuild;
-
-            # Pre-cuda_compat CUDA release:
-            meta.badPlatforms = final.lib.optionals (cuda_compat == null) final.lib.platforms.all;
-            meta.platforms = cuda_compat.meta.platforms or [ ];
-          }
-          ./auto-add-cuda-compat-runpath.sh
-      )
-      {};
 }
diff --git a/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook.sh b/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook.sh
deleted file mode 100644
index ba04c2e0806af..0000000000000
--- a/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-# shellcheck shell=bash
-
-# Should we mimick cc-wrapper's "hygiene"?
-[[ -z ${strictDeps-} ]] || (( "$hostOffset" < 0 )) || return 0
-
-echo "Sourcing mark-for-cudatoolkit-root-hook" >&2
-
-markForCUDAToolkit_ROOT() {
-    mkdir -p "${prefix}/nix-support"
-    [[ -f "${prefix}/nix-support/include-in-cudatoolkit-root" ]] && return
-    echo "$pname-$output" > "${prefix}/nix-support/include-in-cudatoolkit-root"
-}
-
-fixupOutputHooks+=(markForCUDAToolkit_ROOT)
diff --git a/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/default.nix b/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/default.nix
new file mode 100644
index 0000000000000..86ff28d6c41a1
--- /dev/null
+++ b/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/default.nix
@@ -0,0 +1,4 @@
+# Internal hook, used by cudatoolkit and cuda redist packages
+# to accommodate automatic CUDAToolkit_ROOT construction
+{ makeSetupHook }:
+makeSetupHook { name = "mark-for-cudatoolkit-root-hook"; } ./mark-for-cudatoolkit-root-hook.sh
diff --git a/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/mark-for-cudatoolkit-root-hook.sh b/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/mark-for-cudatoolkit-root-hook.sh
new file mode 100644
index 0000000000000..67c4f5ecf51b0
--- /dev/null
+++ b/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/mark-for-cudatoolkit-root-hook.sh
@@ -0,0 +1,65 @@
+# shellcheck shell=bash
+
+# Guard helper function
+# Returns 0 (success) if the hook should be run, 1 (failure) otherwise.
+# This allows us to use short-circuit evaluation to avoid running the hook when it shouldn't be.
+markForCUDAToolkit_ROOTGuard() {
+    local -i hostOffset=${hostOffset:?}
+    local -i targetOffset=${targetOffset:?}
+    local fnName="mark-for-cudatoolkit-root-hook::markForCUDAToolkit_ROOTGuard hostOffset=$hostOffset targetOffset=$targetOffset"
+    local guard=Skipping
+    local reason
+
+    # This hook is meant only to add a stub file to the nix-support directory of the package including it in its
+    # nativeBuildInputs, so that the setup hook propagated by cuda_nvcc, setup-cuda-hook, can detect it and add the
+    # package to the CUDA toolkit root. Therefore, since it only modifies the package being built and will not be
+    # propagated, it should only ever be included in nativeBuildInputs.
+    if (( hostOffset == -1 && targetOffset == 0)); then
+        guard=Sourcing
+        reason="because the hook is in nativeBuildInputs relative to the package being built"
+    fi
+
+    echo "$fnName: $guard $reason" >&2
+
+    # Recall that test commands return 0 for success and 1 for failure.
+    [[ "$guard" == Sourcing ]]
+    return $?
+}
+
+# Guard against calling the hook at the wrong time.
+markForCUDAToolkit_ROOTGuard || return 0
+
+# Make a copy of the current offsets, so that we can use them in information messages; this is necessary because the
+# offsets are not consistently available in the environment during various phases of the build.
+declare -g snapshotHostOffset="${hostOffset:?}"
+declare -g snapshotTargetOffset="${targetOffset:?}"
+
+markForCUDAToolkit_ROOTGetFnName() {
+    local fnName="mark-for-cudatoolkit-root-hook::${1:?}"
+    local hostOffset="${hostOffset:-$snapshotHostOffset}"
+    local targetOffset="${targetOffset:-$snapshotTargetOffset}"
+    echo "$fnName hostOffset=$hostOffset targetOffset=$targetOffset"
+}
+
+markForCUDAToolkit_ROOT() {
+    # Name function never needs to have return value checked.
+    # shellcheck disable=SC2155
+    local fnName="$(markForCUDAToolkit_ROOTGetFnName markForCUDAToolkit_ROOT)"
+    echo "$fnName: Running on ${prefix:?}" >&2
+
+    local markerPath="$prefix/nix-support/include-in-cudatoolkit-root"
+    mkdir -p "$(dirname "$markerPath")"
+    if [[ -f "$markerPath" ]]; then
+        (( ${NIX_DEBUG:-0} >= 1 )) && echo "$fnName: $markerPath exists, skipping" >&2
+        return 0
+    fi
+
+    # Always create the file, even if it's empty, since setup-cuda-hook relies on its existence.
+    # However, only populate it if strictDeps is not set.
+    touch "$markerPath"
+    if [[ -z "${strictDeps-}" ]]; then
+        (( ${NIX_DEBUG:-0} >= 1 )) || echo "$fnName: populating $markerPath" >&2
+        echo "${pname:?}-${output:?}" > "$markerPath"
+    fi
+}
+fixupOutputHooks+=(markForCUDAToolkit_ROOT)
diff --git a/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook.sh b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook.sh
deleted file mode 100644
index a4a444fcd2417..0000000000000
--- a/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook.sh
+++ /dev/null
@@ -1,119 +0,0 @@
-# shellcheck shell=bash
-
-# Only run the hook from nativeBuildInputs
-(( "$hostOffset" == -1 && "$targetOffset" == 0)) || return 0
-
-guard=Sourcing
-reason=
-
-[[ -n ${cudaSetupHookOnce-} ]] && guard=Skipping && reason=" because the hook has been propagated more than once"
-
-if (( "${NIX_DEBUG:-0}" >= 1 )) ; then
-    echo "$guard hostOffset=$hostOffset targetOffset=$targetOffset setupCudaHook$reason" >&2
-else
-    echo "$guard setup-cuda-hook$reason" >&2
-fi
-
-[[ "$guard" = Sourcing ]] || return 0
-
-declare -g cudaSetupHookOnce=1
-declare -Ag cudaHostPathsSeen=()
-declare -Ag cudaOutputToPath=()
-
-extendcudaHostPathsSeen() {
-    (( "${NIX_DEBUG:-0}" >= 1 )) && echo "extendcudaHostPathsSeen $1" >&2
-
-    local markerPath="$1/nix-support/include-in-cudatoolkit-root"
-    [[ ! -f "${markerPath}" ]] && return
-    [[ -v cudaHostPathsSeen[$1] ]] && return
-
-    cudaHostPathsSeen["$1"]=1
-
-    # E.g. cuda_cudart-lib
-    local cudaOutputName
-    read -r cudaOutputName < "$markerPath"
-
-    [[ -z "$cudaOutputName" ]] && return
-
-    local oldPath="${cudaOutputToPath[$cudaOutputName]-}"
-    [[ -n "$oldPath" ]] && echo "extendcudaHostPathsSeen: warning: overwriting $cudaOutputName from $oldPath to $1" >&2
-    cudaOutputToPath["$cudaOutputName"]="$1"
-}
-addEnvHooks "$targetOffset" extendcudaHostPathsSeen
-
-setupCUDAToolkit_ROOT() {
-    (( "${NIX_DEBUG:-0}" >= 1 )) && echo "setupCUDAToolkit_ROOT: cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2
-
-    for path in "${!cudaHostPathsSeen[@]}" ; do
-        addToSearchPathWithCustomDelimiter ";" CUDAToolkit_ROOT "$path"
-        if [[ -d "$path/include" ]] ; then
-            addToSearchPathWithCustomDelimiter ";" CUDAToolkit_INCLUDE_DIR "$path/include"
-        fi
-    done
-
-    export cmakeFlags+=" -DCUDAToolkit_INCLUDE_DIR=$CUDAToolkit_INCLUDE_DIR -DCUDAToolkit_ROOT=$CUDAToolkit_ROOT"
-}
-preConfigureHooks+=(setupCUDAToolkit_ROOT)
-
-setupCUDAToolkitCompilers() {
-    echo Executing setupCUDAToolkitCompilers >&2
-
-    if [[ -n "${dontSetupCUDAToolkitCompilers-}" ]] ; then
-        return
-    fi
-
-    # Point NVCC at a compatible compiler
-
-    # For CMake-based projects:
-    # https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
-    # https://cmake.org/cmake/help/latest/envvar/CUDAHOSTCXX.html
-    # https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_HOST_COMPILER.html
-
-    export cmakeFlags+=" -DCUDA_HOST_COMPILER=@ccFullPath@"
-    export cmakeFlags+=" -DCMAKE_CUDA_HOST_COMPILER=@ccFullPath@"
-
-    # For non-CMake projects:
-    # We prepend --compiler-bindir to nvcc flags.
-    # Downstream packages can override these, because NVCC
-    # uses the last --compiler-bindir it gets on the command line.
-    # FIXME: this results in "incompatible redefinition" warnings.
-    # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin
-    if [ -z "${CUDAHOSTCXX-}" ]; then
-      export CUDAHOSTCXX="@ccFullPath@";
-    fi
-
-    export NVCC_PREPEND_FLAGS+=" --compiler-bindir=@ccRoot@/bin"
-
-    # NOTE: We set -Xfatbin=-compress-all, which reduces the size of the compiled
-    #   binaries. If binaries grow over 2GB, they will fail to link. This is a problem for us, as
-    #   the default set of CUDA capabilities we build can regularly cause this to occur (for
-    #   example, with Magma).
-    #
-    # @SomeoneSerge: original comment was made by @ConnorBaker in .../cudatoolkit/common.nix
-    if [[ -z "${dontCompressFatbin-}" ]]; then
-        export NVCC_PREPEND_FLAGS+=" -Xfatbin=-compress-all"
-    fi
-}
-preConfigureHooks+=(setupCUDAToolkitCompilers)
-
-propagateCudaLibraries() {
-    (( "${NIX_DEBUG:-0}" >= 1 )) && echo "propagateCudaLibraries: cudaPropagateToOutput=$cudaPropagateToOutput cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2
-
-    [[ -z "${cudaPropagateToOutput-}" ]] && return
-
-    mkdir -p "${!cudaPropagateToOutput}/nix-support"
-    # One'd expect this should be propagated-bulid-build-deps, but that doesn't seem to work
-    echo "@setupCudaHook@" >> "${!cudaPropagateToOutput}/nix-support/propagated-native-build-inputs"
-
-    local propagatedBuildInputs=( "${!cudaHostPathsSeen[@]}" )
-    for output in $(getAllOutputNames) ; do
-        if [[ ! "$output" = "$cudaPropagateToOutput" ]] ; then
-            propagatedBuildInputs+=( "${!output}" )
-        fi
-        break
-    done
-
-    # One'd expect this should be propagated-host-host-deps, but that doesn't seem to work
-    printWords "${propagatedBuildInputs[@]}" >> "${!cudaPropagateToOutput}/nix-support/propagated-build-inputs"
-}
-postFixupHooks+=(propagateCudaLibraries)
diff --git a/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/default.nix b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/default.nix
new file mode 100644
index 0000000000000..f36e9339de5ce
--- /dev/null
+++ b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/default.nix
@@ -0,0 +1,18 @@
+# Currently propagated by cuda_nvcc or cudatoolkit, rather than used directly
+{ backendStdenv, makeSetupHook }:
+let
+  inherit (backendStdenv) cc;
+in
+makeSetupHook
+  {
+    name = "setup-cuda-hook";
+    substitutions = {
+      # Required in addition to ccRoot as otherwise bin/gcc is looked up
+      # when building CMakeCUDACompilerId.cu
+      ccFullPath = "${cc}/bin/${cc.targetPrefix}c++";
+      # Point NVCC at a compatible compiler
+      ccRoot = "${cc}";
+      setupCudaHook = placeholder "out";
+    };
+  }
+  ./setup-cuda-hook.sh
diff --git a/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/setup-cuda-hook.sh b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/setup-cuda-hook.sh
new file mode 100644
index 0000000000000..694a4b6cf0ef9
--- /dev/null
+++ b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/setup-cuda-hook.sh
@@ -0,0 +1,168 @@
+# shellcheck shell=bash
+
+# Guard helper function
+# Returns 0 (success) if the hook should be run, 1 (failure) otherwise.
+# This allows us to use short-circuit evaluation to avoid running the hook when it shouldn't be.
+setupCudaHookGuard() {
+    local -i hostOffset=${hostOffset:?}
+    local -i targetOffset=${targetOffset:?}
+    local fnName="setup-cuda-hook::setupCudaHookGuard hostOffset=$hostOffset targetOffset=$targetOffset"
+    local guard=Skipping
+    local reason=
+
+    # This hook is meant only to add a stub file to the nix-support directory of the package including it in its
+    # nativeBuildInputs, so that the setup hook propagated by cuda_nvcc, setup-cuda-hook, can detect it and add the
+    # package to the CUDA toolkit root. Therefore, since it only modifies the package being built and will not be
+    # propagated, it should only ever be included in nativeBuildInputs.
+    if (( hostOffset == -1 && targetOffset == 0)); then
+        guard=Sourcing
+        reason="because the hook is in nativeBuildInputs relative to the package being built"
+    elif [[ -n "${cudaSetupHookOnce-}" ]]; then
+        guard=Skipping
+        reason="because the hook has been propagated more than once"
+    fi
+
+    echo "$fnName: $guard $reason" >&2
+
+    # Recall that test commands return 0 for success and 1 for failure.
+    [[ "$guard" == Sourcing ]]
+    return $?
+}
+
+# Guard against calling the hook at the wrong time.
+setupCudaHookGuard || return 0
+
+declare -g cudaSetupHookOnce=1
+declare -Ag cudaHostPathsSeen=()
+declare -Ag cudaOutputToPath=()
+
+# Make a copy of the current offsets, so that we can use them in information messages; this is necessary because the
+# offsets are not consistently available in the environment during various phases of the build.
+declare -g snapshotHostOffset="${hostOffset:?}"
+declare -g snapshotTargetOffset="${targetOffset:?}"
+
+setupCudaHookGetFnName() {
+    local fnName="setup-cuda-hook::${1:?}"
+    local hostOffset="${hostOffset:-$snapshotHostOffset}"
+    local targetOffset="${targetOffset:-$snapshotTargetOffset}"
+    echo "$fnName hostOffset=$hostOffset targetOffset=$targetOffset"
+}
+
+extendCudaHostPathsSeen() {
+    # Name function never needs to have return value checked.
+    # shellcheck disable=SC2155
+    local fnName="$(setupCudaHookGetFnName extendCudaHostPathsSeen)"
+    local markerPath="$1/nix-support/include-in-cudatoolkit-root"
+    (( ${NIX_DEBUG:-0} >= 1 )) && echo "$fnName: checking for existence of $markerPath" >&2
+
+    if [[ ! -f "$markerPath" ]]; then
+        (( ${NIX_DEBUG:-0} >= 1 )) && echo "$fnName: skipping since $markerPath does not exist" >&2
+        return 0
+    fi
+
+    if [[ -v cudaHostPathsSeen["$1"] ]]; then
+        (( ${NIX_DEBUG:-0} >= 1 )) && echo "$fnName: skipping since $1 has already been seen" >&2
+        return 0
+    fi
+
+    # Add the path to the list of CUDA host paths.
+    cudaHostPathsSeen["$1"]=1
+    (( ${NIX_DEBUG:-0} >= 1 )) && echo "$fnName: added $1 to cudaHostPathsSeen" >&2
+
+    # Only attempt to read the file referenced by markerPath if strictDeps is not set; otherwise it is blank and we
+    # don't need to read it.
+    [[ -n "${strictDeps-}" ]] && return 0
+
+    # E.g. cuda_cudart-lib
+    local cudaOutputName
+    # Fail gracefully if the file is empty. This may happen if the package was built with strictDeps set,
+    # but the current build does not have strictDeps set.
+    read -r cudaOutputName < "$markerPath" || return 0
+
+    [[ -z "$cudaOutputName" ]] && return 0
+
+    local oldPath="${cudaOutputToPath[$cudaOutputName]-}"
+    [[ -n "$oldPath" ]] && echo "$fnName: warning: overwriting $cudaOutputName from $oldPath to $1" >&2
+    cudaOutputToPath["$cudaOutputName"]="$1"
+}
+addEnvHooks "${targetOffset:?}" extendCudaHostPathsSeen
+
+setupCUDAToolkit_ROOT() {
+    # Name function never needs to have return value checked.
+    # shellcheck disable=SC2155
+    local fnName="$(setupCudaHookGetFnName setupCUDAToolkit_ROOT)"
+    (( ${NIX_DEBUG:-0} >= 1 )) && echo "$fnName: cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2
+
+    for path in "${!cudaHostPathsSeen[@]}"; do
+        addToSearchPathWithCustomDelimiter ";" CUDAToolkit_ROOT "$path"
+        [[ -d "$path/include" ]] && addToSearchPathWithCustomDelimiter ";" CUDAToolkit_INCLUDE_DIR "$path/include"
+    done
+
+    export cmakeFlagsArray+=(
+        -DCUDAToolkit_INCLUDE_DIR="${CUDAToolkit_INCLUDE_DIR:-}"
+        -DCUDAToolkit_ROOT="${CUDAToolkit_ROOT:-}"
+    )
+}
+preConfigureHooks+=(setupCUDAToolkit_ROOT)
+
+setupCUDAToolkitCompilers() {
+    # Name function never needs to have return value checked.
+    # shellcheck disable=SC2155
+    local fnName="$(setupCudaHookGetFnName setupCUDAToolkitCompilers)"
+    echo "$fnName: Running" >&2
+
+    [[ -n "${dontSetupCUDAToolkitCompilers-}" ]] && return 0
+
+    # Point NVCC at a compatible compiler
+
+    # For CMake-based projects:
+    # https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
+    # https://cmake.org/cmake/help/latest/envvar/CUDAHOSTCXX.html
+    # https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_HOST_COMPILER.html
+
+    export cmakeFlagsArray+=(
+        -DCUDA_HOST_COMPILER="@ccFullPath@"
+        -DCMAKE_CUDA_HOST_COMPILER="@ccFullPath@"
+    )
+
+    # For non-CMake projects:
+    # We prepend --compiler-bindir to nvcc flags.
+    # Downstream packages can override these, because NVCC
+    # uses the last --compiler-bindir it gets on the command line.
+    # FIXME: this results in "incompatible redefinition" warnings.
+    # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin
+    [[ -z "${CUDAHOSTCXX-}" ]] && export CUDAHOSTCXX="@ccFullPath@"
+
+    export NVCC_PREPEND_FLAGS+=" --compiler-bindir=@ccRoot@/bin"
+
+    # NOTE: We set -Xfatbin=-compress-all, which reduces the size of the compiled
+    # binaries. If binaries grow over 2GB, they will fail to link. This is a problem for us, as
+    # the default set of CUDA capabilities we build can regularly cause this to occur (for
+    # example, with Magma).
+    #
+    # @SomeoneSerge: original comment was made by @ConnorBaker in .../cudatoolkit/common.nix
+    [[ -z "${dontCompressFatbin-}" ]] && export NVCC_PREPEND_FLAGS+=" -Xfatbin=-compress-all"
+}
+preConfigureHooks+=(setupCUDAToolkitCompilers)
+
+propagateCudaLibraries() {
+    # Name function never needs to have return value checked.
+    # shellcheck disable=SC2155
+    local fnName="$(setupCudaHookGetFnName propagateCudaLibraries)"
+    (( ${NIX_DEBUG:-0} >= 1 )) && echo "$fnName: cudaPropagateToOutput=$cudaPropagateToOutput cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2
+
+    [[ -z "${cudaPropagateToOutput-}" ]] && return 0
+
+    mkdir -p "${!cudaPropagateToOutput}/nix-support"
+    # One'd expect this should be propagated-bulid-build-deps, but that doesn't seem to work
+    echo "@setupCudaHook@" >> "${!cudaPropagateToOutput}/nix-support/propagated-native-build-inputs"
+
+    local propagatedBuildInputs=( "${!cudaHostPathsSeen[@]}" )
+    for output in $(getAllOutputNames); do
+        [[ ! "$output" = "$cudaPropagateToOutput" ]] && propagatedBuildInputs+=( "${!output}" ) && break
+    done
+
+    # One'd expect this should be propagated-host-host-deps, but that doesn't seem to work
+    printWords "${propagatedBuildInputs[@]}" >> "${!cudaPropagateToOutput}/nix-support/propagated-build-inputs"
+}
+postFixupHooks+=(propagateCudaLibraries)
diff --git a/pkgs/development/cuda-modules/tensorrt/fixup.nix b/pkgs/development/cuda-modules/tensorrt/fixup.nix
index 51ca3d652bd1a..f632f1b138d96 100644
--- a/pkgs/development/cuda-modules/tensorrt/fixup.nix
+++ b/pkgs/development/cuda-modules/tensorrt/fixup.nix
@@ -1,7 +1,7 @@
 {
+  backendStdenv,
   cudaVersion,
   final,
-  hostPlatform,
   lib,
   mkVersionedPackageName,
   package,
@@ -17,6 +17,9 @@ let
     strings
     versions
     ;
+
+  inherit (backendStdenv) hostPlatform;
+
   # targetArch :: String
   targetArch = attrsets.attrByPath [ hostPlatform.system ] "unsupported" {
     x86_64-linux = "x86_64-linux-gnu";
diff --git a/pkgs/top-level/cuda-packages.nix b/pkgs/top-level/cuda-packages.nix
index 4b8ad4646485e..8191d6035cbb0 100644
--- a/pkgs/top-level/cuda-packages.nix
+++ b/pkgs/top-level/cuda-packages.nix
@@ -21,17 +21,16 @@
 #
 # I've (@connorbaker) attempted to do that, though I'm unsure of how this will interact with overrides.
 {
-  callPackage,
+  config,
   cudaVersion,
+  generateSplicesForMkScope,
   lib,
-  newScope,
+  makeScopeWithSplicing',
   pkgs,
   __attrsFailEvaluation ? true,
 }:
 let
   inherit (lib)
-    attrsets
-    customisation
     fixedPoints
     strings
     versions
@@ -39,13 +38,13 @@ let
   # Backbone
   gpus = builtins.import ../development/cuda-modules/gpus.nix;
   nvccCompatibilities = builtins.import ../development/cuda-modules/nvcc-compatibilities.nix;
-  flags = callPackage ../development/cuda-modules/flags.nix {inherit cudaVersion gpus;};
   passthruFunction =
     final:
     (
       {
         inherit cudaVersion lib pkgs;
-        inherit gpus nvccCompatibilities flags;
+        inherit gpus nvccCompatibilities;
+        flags = final.callPackage ../development/cuda-modules/flags.nix {};
         cudaMajorVersion = versions.major cudaVersion;
         cudaMajorMinorVersion = versions.majorMinor cudaVersion;
         cudaOlder = strings.versionOlder cudaVersion;
@@ -58,7 +57,7 @@ let
         cudaPackages = final;
 
         # TODO(@connorbaker): `cudaFlags` is an alias for `flags` which should be removed in the future.
-        cudaFlags = flags;
+        cudaFlags = final.flags;
 
         # Exposed as cudaPackages.backendStdenv.
         # This is what nvcc uses as a backend,
@@ -86,32 +85,33 @@ let
     ];
 
   composedExtension = fixedPoints.composeManyExtensions [
-    (import ../development/cuda-modules/setup-hooks/extension.nix)
-    (callPackage ../development/cuda-modules/cuda/extension.nix {inherit cudaVersion;})
-    (callPackage ../development/cuda-modules/cuda/overrides.nix {inherit cudaVersion;})
-    (callPackage ../development/cuda-modules/generic-builders/multiplex.nix {
-      inherit cudaVersion flags mkVersionedPackageName;
-      pname = "cudnn";
-      releasesModule = ../development/cuda-modules/cudnn/releases.nix;
-      shimsFn = ../development/cuda-modules/cudnn/shims.nix;
-      fixupFn = ../development/cuda-modules/cudnn/fixup.nix;
-    })
-    (callPackage ../development/cuda-modules/cutensor/extension.nix {
-      inherit cudaVersion flags mkVersionedPackageName;
-    })
-    (callPackage ../development/cuda-modules/generic-builders/multiplex.nix {
-      inherit cudaVersion flags mkVersionedPackageName;
-      pname = "tensorrt";
-      releasesModule = ../development/cuda-modules/tensorrt/releases.nix;
-      shimsFn = ../development/cuda-modules/tensorrt/shims.nix;
-      fixupFn = ../development/cuda-modules/tensorrt/fixup.nix;
-    })
-    (callPackage ../development/cuda-modules/cuda-samples/extension.nix {inherit cudaVersion;})
-    (callPackage ../development/cuda-modules/cuda-library-samples/extension.nix {})
+    (builtins.import ../development/cuda-modules/setup-hooks/extension.nix)
+    (builtins.import ../development/cuda-modules/cuda/extension.nix {inherit cudaVersion lib;})
+    (builtins.import ../development/cuda-modules/cuda/overrides.nix)
+    # (callPackage ../development/cuda-modules/generic-builders/multiplex.nix {
+    #   inherit cudaVersion flags mkVersionedPackageName;
+    #   pname = "cudnn";
+    #   releasesModule = ../development/cuda-modules/cudnn/releases.nix;
+    #   shimsFn = ../development/cuda-modules/cudnn/shims.nix;
+    #   fixupFn = ../development/cuda-modules/cudnn/fixup.nix;
+    # })
+    # (callPackage ../development/cuda-modules/cutensor/extension.nix {
+    #   inherit cudaVersion flags mkVersionedPackageName;
+    # })
+    # (callPackage ../development/cuda-modules/generic-builders/multiplex.nix {
+    #   inherit cudaVersion flags mkVersionedPackageName;
+    #   pname = "tensorrt";
+    #   releasesModule = ../development/cuda-modules/tensorrt/releases.nix;
+    #   shimsFn = ../development/cuda-modules/tensorrt/shims.nix;
+    #   fixupFn = ../development/cuda-modules/tensorrt/fixup.nix;
+    # })
+    # (callPackage ../development/cuda-modules/cuda-samples/extension.nix {inherit cudaVersion;})
+    # (callPackage ../development/cuda-modules/cuda-library-samples/extension.nix {})
   ];
 
-  cudaPackages = customisation.makeScope newScope (
-    fixedPoints.extends composedExtension passthruFunction
-  );
+  cudaPackages = makeScopeWithSplicing' {
+    otherSplices = generateSplicesForMkScope "cudaPackages";
+    f = fixedPoints.extends composedExtension passthruFunction;
+  };
 in
 cudaPackages // { inherit __attrsFailEvaluation; }