diff --git a/pkgs/development/compilers/cudatoolkit/extension.nix b/pkgs/development/compilers/cudatoolkit/extension.nix
index d75d288f5577e..016675fa07015 100644
--- a/pkgs/development/compilers/cudatoolkit/extension.nix
+++ b/pkgs/development/compilers/cudatoolkit/extension.nix
@@ -47,35 +47,21 @@ final: prev: let
         ./hooks/mark-for-cudatoolkit-root-hook.sh)
     { });
 
-  # Normally propagated by cuda_nvcc or cudatoolkit through their depsHostHostPropagated
+  # Currently propagated by cuda_nvcc or cudatoolkit, rather than used directly
   setupCudaHook = (final.callPackage
     ({ makeSetupHook, backendStdenv }:
       makeSetupHook
         {
           name = "setup-cuda-hook";
 
+          substitutions.setupCudaHook = placeholder "out";
+
           # Point NVCC at a compatible compiler
           substitutions.ccRoot = "${backendStdenv.cc}";
 
           # Required in addition to ccRoot as otherwise bin/gcc is looked up
           # when building CMakeCUDACompilerId.cu
           substitutions.ccFullPath = "${backendStdenv.cc}/bin/${backendStdenv.cc.targetPrefix}c++";
-
-          # Required by cmake's enable_language(CUDA) to build a test program
-          # When implementing cross-compilation support: this is
-          # final.pkgs.targetPackages.cudaPackages.cuda_cudart
-          # Given the multiple-outputs each CUDA redist has, we can specify the exact components we
-          # need from the package. CMake requires:
-          # - the cuda_runtime.h header, which is in the dev output
-          # - the dynamic library, which is in the lib output
-          # - the static library, which is in the static output
-          substitutions.cudartFlags = let cudart = final.cuda_cudart; in
-            builtins.concatStringsSep " " (final.lib.optionals (final ? cuda_cudart) ([
-              "-I${final.lib.getDev cudart}/include"
-              "-L${final.lib.getLib cudart}/lib"
-            ] ++ final.lib.optionals (builtins.elem "static" cudart.outputs) [
-              "-L${cudart.static}/lib"
-            ]));
         }
         ./hooks/setup-cuda-hook.sh)
     { });
diff --git a/pkgs/development/compilers/cudatoolkit/hooks/mark-for-cudatoolkit-root-hook.sh b/pkgs/development/compilers/cudatoolkit/hooks/mark-for-cudatoolkit-root-hook.sh
index 5c18760a3a2b0..ba04c2e0806af 100644
--- a/pkgs/development/compilers/cudatoolkit/hooks/mark-for-cudatoolkit-root-hook.sh
+++ b/pkgs/development/compilers/cudatoolkit/hooks/mark-for-cudatoolkit-root-hook.sh
@@ -1,8 +1,14 @@
 # shellcheck shell=bash
 
+# Should we mimick cc-wrapper's "hygiene"?
+[[ -z ${strictDeps-} ]] || (( "$hostOffset" < 0 )) || return 0
+
+echo "Sourcing mark-for-cudatoolkit-root-hook" >&2
+
 markForCUDAToolkit_ROOT() {
     mkdir -p "${prefix}/nix-support"
-    touch "${prefix}/nix-support/include-in-cudatoolkit-root"
+    [[ -f "${prefix}/nix-support/include-in-cudatoolkit-root" ]] && return
+    echo "$pname-$output" > "${prefix}/nix-support/include-in-cudatoolkit-root"
 }
 
 fixupOutputHooks+=(markForCUDAToolkit_ROOT)
diff --git a/pkgs/development/compilers/cudatoolkit/hooks/nvcc-setup-hook.sh b/pkgs/development/compilers/cudatoolkit/hooks/nvcc-setup-hook.sh
deleted file mode 100644
index e75a84a9550e7..0000000000000
--- a/pkgs/development/compilers/cudatoolkit/hooks/nvcc-setup-hook.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-# shellcheck shell=bash
-
-# CMake's enable_language(CUDA) runs a compiler test and it doesn't account for
-# CUDAToolkit_ROOT. We have to help it locate libcudart
-export NVCC_APPEND_FLAGS+=" -L@cudartLib@/lib -L@cudartStatic@/lib -I@cudartInclude@/include"
diff --git a/pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh b/pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh
index 0fa8883081c50..7b7b3bdde80e3 100644
--- a/pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh
+++ b/pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh
@@ -3,19 +3,57 @@
 # Only run the hook from nativeBuildInputs
 (( "$hostOffset" == -1 && "$targetOffset" == 0)) || return 0
 
-echo Sourcing setup-cuda-hook >&2
+guard=Sourcing
+reason=
 
-extendCUDAToolkit_ROOT() {
-    if [[ -f "$1/nix-support/include-in-cudatoolkit-root" ]] ; then
-        addToSearchPathWithCustomDelimiter ";" CUDAToolkit_ROOT "$1"
+[[ -n ${cudaSetupHookOnce-} ]] && guard=Skipping && reason=" because the hook has been propagated more than once"
 
-        if [[ -d "$1/include" ]] ; then
-            addToSearchPathWithCustomDelimiter ";" CUDAToolkit_INCLUDE_DIR "$1/include"
-        fi
-    fi
+if (( "${NIX_DEBUG:-0}" >= 1 )) ; then
+    echo "$guard hostOffset=$hostOffset targetOffset=$targetOffset setupCudaHook$reason" >&2
+else
+    echo "$guard setup-cuda-hook$reason" >&2
+fi
+
+[[ "$guard" = Sourcing ]] || return 0
+
+declare -g cudaSetupHookOnce=1
+declare -Ag cudaHostPathsSeen=()
+declare -Ag cudaOutputToPath=()
+
+extendcudaHostPathsSeen() {
+    (( "${NIX_DEBUG:-0}" >= 1 )) && echo "extendcudaHostPathsSeen $1" >&2
+
+    local markerPath="$1/nix-support/include-in-cudatoolkit-root"
+    [[ ! -f "${markerPath}" ]] && return
+    [[ -v cudaHostPathsSeen[$1] ]] && return
+
+    cudaHostPathsSeen["$1"]=1
+
+    # E.g. cuda_cudart-lib
+    local cudaOutputName
+    read -r cudaOutputName < "$markerPath"
+
+    [[ -z "$cudaOutputName" ]] && return
+
+    local oldPath="${cudaOutputToPath[$cudaOutputName]-}"
+    [[ -n "$oldPath" ]] && echo "extendcudaHostPathsSeen: warning: overwriting $cudaOutputName from $oldPath to $1" >&2
+    cudaOutputToPath["$cudaOutputName"]="$1"
 }
+addEnvHooks "$targetOffset" extendcudaHostPathsSeen
+
+setupCUDAToolkit_ROOT() {
+    (( "${NIX_DEBUG:-0}" >= 1 )) && echo "setupCUDAToolkit_ROOT: cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2
 
-addEnvHooks "$targetOffset" extendCUDAToolkit_ROOT
+    for path in "${!cudaHostPathsSeen[@]}" ; do
+        addToSearchPathWithCustomDelimiter ";" CUDAToolkit_ROOT "$path"
+        if [[ -d "$path/include" ]] ; then
+            addToSearchPathWithCustomDelimiter ";" CUDAToolkit_INCLUDE_DIR "$path/include"
+        fi
+    done
+
+    export cmakeFlags+=" -DCUDAToolkit_INCLUDE_DIR=$CUDAToolkit_INCLUDE_DIR -DCUDAToolkit_ROOT=$CUDAToolkit_ROOT"
+}
+preConfigureHooks+=(setupCUDAToolkit_ROOT)
 
 setupCUDAToolkitCompilers() {
     echo Executing setupCUDAToolkitCompilers >&2
@@ -58,15 +96,44 @@ setupCUDAToolkitCompilers() {
 
     # CMake's enable_language(CUDA) runs a compiler test and it doesn't account for
     # CUDAToolkit_ROOT. We have to help it locate libcudart
-    local cudartFlags="@cudartFlags@"
-    if [[ -z "${nvccDontPrependCudartFlags-}" ]] && [[ -n "${cudartFlags:-}" ]] ; then
-        export NVCC_APPEND_FLAGS+=" $cudartFlags"
+    if [[ -z "${nvccDontPrependCudartFlags-}" ]] ; then
+        if [[ ! -v cudaOutputToPath["cuda_cudart-out"] ]] ; then
+            echo "setupCUDAToolkitCompilers: missing cudaPackages.cuda_cudart. This may become an an error in the future" >&2
+            # exit 1
+        fi
+        for pkg in "${!cudaOutputToPath[@]}" ; do
+            [[ ! "$pkg" = cuda_cudart* ]] && continue
+
+            local path="${cudaOutputToPath[$pkg]}"
+            if [[ -d "$path/include" ]] ; then
+                export NVCC_PREPEND_FLAGS+=" -I$path/include"
+            fi
+            if [[ -d "$path/lib" ]] ; then
+                export NVCC_PREPEND_FLAGS+=" -L$path/lib"
+            fi
+        done
     fi
 }
+preConfigureHooks+=(setupCUDAToolkitCompilers)
 
-setupCMakeCUDAToolkit_ROOT() {
-    export cmakeFlags+=" -DCUDAToolkit_INCLUDE_DIR=$CUDAToolkit_INCLUDE_DIR -DCUDAToolkit_ROOT=$CUDAToolkit_ROOT"
-}
+propagateCudaLibraries() {
+    (( "${NIX_DEBUG:-0}" >= 1 )) && echo "propagateCudaLibraries: cudaPropagateToOutput=$cudaPropagateToOutput cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2
 
-postHooks+=(setupCUDAToolkitCompilers)
-preConfigureHooks+=(setupCMakeCUDAToolkit_ROOT)
+    [[ -z "${cudaPropagateToOutput-}" ]] && return
+
+    mkdir -p "${!cudaPropagateToOutput}/nix-support"
+    # One'd expect this should be propagated-bulid-build-deps, but that doesn't seem to work
+    echo "@setupCudaHook@" >> "${!cudaPropagateToOutput}/nix-support/propagated-native-build-inputs"
+
+    local propagatedBuildInputs=( "${!cudaHostPathsSeen[@]}" )
+    for output in $(getAllOutputNames) ; do
+        if [[ ! "$output" = "$cudaPropagateToOutput" ]] ; then
+            propagatedBuildInputs+=( "${!output}" )
+        fi
+        break
+    done
+
+    # One'd expect this should be propagated-host-host-deps, but that doesn't seem to work
+    printWords "${propagatedBuildInputs[@]}" >> "${!cudaPropagateToOutput}/nix-support/propagated-build-inputs"
+}
+postFixupHooks+=(propagateCudaLibraries)
diff --git a/pkgs/development/libraries/cctag/default.nix b/pkgs/development/libraries/cctag/default.nix
index 2c1a5f9ae7863..238821b6af914 100644
--- a/pkgs/development/libraries/cctag/default.nix
+++ b/pkgs/development/libraries/cctag/default.nix
@@ -49,7 +49,7 @@ stdenv.mkDerivation rec {
   buildInputs = [
     boost179
     eigen
-    opencv
+    opencv.cxxdev
   ];
 
   # Tests are broken on Darwin (linking issue)
diff --git a/pkgs/development/libraries/nvidia-optical-flow-sdk/default.nix b/pkgs/development/libraries/nvidia-optical-flow-sdk/default.nix
index a82fa9068c66c..2914d059cfaff 100644
--- a/pkgs/development/libraries/nvidia-optical-flow-sdk/default.nix
+++ b/pkgs/development/libraries/nvidia-optical-flow-sdk/default.nix
@@ -18,6 +18,11 @@ stdenv.mkDerivation {
     cp -R * $out/include
   '';
 
+  postFixup = ''
+    mkdir -p $out/nix-support
+    echo $pname >> "$out/nix-support/include-in-cudatoolkit-root"
+  '';
+
   meta = with lib; {
     description = "Nvidia optical flow headers for computing the relative motion of pixels between images";
     homepage = "https://developer.nvidia.com/opticalflow-sdk";
diff --git a/pkgs/development/libraries/opencv/4.x.nix b/pkgs/development/libraries/opencv/4.x.nix
index 4c1b13d1309e0..023e56940b75c 100644
--- a/pkgs/development/libraries/opencv/4.x.nix
+++ b/pkgs/development/libraries/opencv/4.x.nix
@@ -247,8 +247,10 @@ effectiveStdenv.mkDerivation {
 
   outputs = [
     "out"
+    "cxxdev"
     "package_tests"
   ];
+  cudaPropagateToOutput = "cxxdev";
 
   postUnpack = lib.optionalString buildContrib ''
     cp --no-preserve=mode -r "${contribSrc}/modules" "$NIX_BUILD_TOP/source/opencv_contrib"
@@ -328,20 +330,32 @@ effectiveStdenv.mkDerivation {
       bzip2 AVFoundation Cocoa VideoDecodeAcceleration CoreMedia MediaToolbox Accelerate
     ]
     ++ lib.optionals enableDocs [ doxygen graphviz-nox ]
-    ++ lib.optionals enableCuda  (with cudaPackages; [
-      cuda_cudart
-      cuda_cccl # <thrust/*>
-      libnpp # npp.h
+    ++ lib.optionals enableCuda (with cudaPackages; [
+      cuda_cudart.lib
+      cuda_cudart.dev
+      cuda_cccl.dev # <thrust/*>
+      libnpp.dev # npp.h
+      libnpp.lib
+      libnpp.static
+      nvidia-optical-flow-sdk
     ] ++ lib.optionals enableCublas [
-      libcublas # cublas_v2.h
+      # May start using the default $out instead once
+      # https://github.com/NixOS/nixpkgs/issues/271792
+      # has been addressed
+      libcublas.static
+      libcublas.lib
+      libcublas.dev # cublas_v2.h
     ] ++ lib.optionals enableCudnn [
-      cudnn # cudnn.h
+      cudnn.dev # cudnn.h
+      cudnn.lib
+      cudnn.static
     ] ++ lib.optionals enableCufft [
-      libcufft # cufft.h
-  ]);
+      libcufft.dev # cufft.h
+      libcufft.lib
+      libcufft.static
+    ]);
 
-  propagatedBuildInputs = lib.optional enablePython pythonPackages.numpy
-    ++ lib.optionals enableCuda [ nvidia-optical-flow-sdk ];
+  propagatedBuildInputs = lib.optionals enablePython [ pythonPackages.numpy ];
 
   nativeBuildInputs = [ cmake pkg-config unzip ]
   ++ lib.optionals enablePython [
@@ -458,6 +472,7 @@ effectiveStdenv.mkDerivation {
   postInstall = ''
     sed -i "s|{exec_prefix}/$out|{exec_prefix}|;s|{prefix}/$out|{prefix}|" \
       "$out/lib/pkgconfig/opencv4.pc"
+    mkdir $cxxdev
   ''
   # install python distribution information, so other packages can `import opencv`
   + lib.optionalString enablePython ''
diff --git a/pkgs/development/libraries/openvino/default.nix b/pkgs/development/libraries/openvino/default.nix
index 5761f9e7bb645..6ff2be8ddbd6e 100644
--- a/pkgs/development/libraries/openvino/default.nix
+++ b/pkgs/development/libraries/openvino/default.nix
@@ -122,6 +122,7 @@ stdenv.mkDerivation rec {
     "-DENABLE_CPPLINT:BOOL=OFF"
     "-DBUILD_TESTING:BOOL=OFF"
     "-DENABLE_SAMPLES:BOOL=OFF"
+    (lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true)
   ];
 
   env.NIX_CFLAGS_COMPILE = lib.optionalString stdenv.isAarch64 "-Wno-narrowing";
@@ -133,7 +134,7 @@ stdenv.mkDerivation rec {
   buildInputs = [
     libusb1
     libxml2
-    opencv
+    opencv.cxxdev
     protobuf
     pugixml
     tbb
diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix
index b648e2d7bf360..8ff396cf9cc3a 100644
--- a/pkgs/development/python-modules/torch/default.nix
+++ b/pkgs/development/python-modules/torch/default.nix
@@ -134,7 +134,9 @@ in buildPythonPackage rec {
     "out" # output standard python package
     "dev" # output libtorch headers
     "lib" # output libtorch libraries
+    "cxxdev" # propagated deps for the cmake consumers of torch
   ];
+  cudaPropagateToOutput = "cxxdev";
 
   src = fetchFromGitHub {
     owner = "pytorch";
@@ -339,6 +341,7 @@ in buildPythonPackage rec {
       cuda_cccl.dev # <thrust/*>
       cuda_cudart.dev # cuda_runtime.h and libraries
       cuda_cudart.lib
+      cuda_cudart.static
       cuda_cupti.dev # For kineto
       cuda_cupti.lib # For kineto
       cuda_nvcc.dev # crt/host_config.h; even though we include this in nativeBuildinputs, it's needed here too
@@ -371,7 +374,10 @@ in buildPythonPackage rec {
     ++ lib.optionals rocmSupport [ rocmPackages.llvm.openmp ]
     ++ lib.optionals (cudaSupport || rocmSupport) [ effectiveMagma ]
     ++ lib.optionals stdenv.isLinux [ numactl ]
-    ++ lib.optionals stdenv.isDarwin [ Accelerate CoreServices libobjc ];
+    ++ lib.optionals stdenv.isDarwin [ Accelerate CoreServices libobjc ]
+    ++ lib.optionals tritonSupport [ openai-triton ]
+    ++ lib.optionals MPISupport [ mpi ]
+    ++ lib.optionals rocmSupport [ rocmtoolkit_joined ];
 
   propagatedBuildInputs = [
     cffi
@@ -392,8 +398,10 @@ in buildPythonPackage rec {
 
     # torch/csrc requires `pybind11` at runtime
     pybind11
+  ] ++ lib.optionals tritonSupport [ openai-triton ];
+
+  propagatedCxxBuildInputs = [
   ]
-  ++ lib.optionals tritonSupport [ openai-triton ]
   ++ lib.optionals MPISupport [ mpi ]
   ++ lib.optionals rocmSupport [ rocmtoolkit_joined ];
 
@@ -454,7 +462,10 @@ in buildPythonPackage rec {
       --replace "/build/source/torch/include" "$dev/include"
   '';
 
-  postFixup = lib.optionalString stdenv.isDarwin ''
+  postFixup = ''
+    mkdir -p "$cxxdev/nix-support"
+    printWords "''${propagatedCxxBuildInputs[@]}" >> "$cxxdev/nix-support/propagated-build-inputs"
+  '' + lib.optionalString stdenv.isDarwin ''
     for f in $(ls $lib/lib/*.dylib); do
         install_name_tool -id $lib/lib/$(basename $f) $f || true
     done
diff --git a/pkgs/development/python-modules/torchaudio/default.nix b/pkgs/development/python-modules/torchaudio/default.nix
index 1014ab523821a..5e6f295cbd2b1 100644
--- a/pkgs/development/python-modules/torchaudio/default.nix
+++ b/pkgs/development/python-modules/torchaudio/default.nix
@@ -60,17 +60,7 @@ buildPythonPackage rec {
     ffmpeg-full
     pybind11
     sox
-  ] ++ lib.optionals cudaSupport [
-    cudaPackages.libcurand.dev
-    cudaPackages.libcurand.lib
-    cudaPackages.cuda_cudart # cuda_runtime.h and libraries
-    cudaPackages.cuda_cccl.dev # <thrust/*>
-    cudaPackages.cuda_nvtx.dev
-    cudaPackages.cuda_nvtx.lib # -llibNVToolsExt
-    cudaPackages.libcublas.dev
-    cudaPackages.libcublas.lib
-    cudaPackages.libcufft.dev
-    cudaPackages.libcufft.lib
+    torch.cxxdev
   ];
 
   propagatedBuildInputs = [
diff --git a/pkgs/development/python-modules/torchvision/default.nix b/pkgs/development/python-modules/torchvision/default.nix
index 223130918af59..c52d058fb94e9 100644
--- a/pkgs/development/python-modules/torchvision/default.nix
+++ b/pkgs/development/python-modules/torchvision/default.nix
@@ -17,28 +17,6 @@ let
   inherit (torch) cudaCapabilities cudaPackages cudaSupport;
   inherit (cudaPackages) backendStdenv cudaVersion;
 
-  # NOTE: torchvision doesn't use cudnn; torch does!
-  #   For this reason it is not included.
-  cuda-common-redist = with cudaPackages; [
-    cuda_cccl # <thrust/*>
-    libcublas # cublas_v2.h
-    libcusolver # cusolverDn.h
-    libcusparse # cusparse.h
-  ];
-
-  cuda-native-redist = symlinkJoin {
-    name = "cuda-native-redist-${cudaVersion}";
-    paths = with cudaPackages; [
-      cuda_cudart # cuda_runtime.h
-      cuda_nvcc
-    ] ++ cuda-common-redist;
-  };
-
-  cuda-redist = symlinkJoin {
-    name = "cuda-redist-${cudaVersion}";
-    paths = cuda-common-redist;
-  };
-
   pname = "torchvision";
   version = "0.16.1";
 in
@@ -52,9 +30,15 @@ buildPythonPackage {
     hash = "sha256-TsYBDtedTQ3+F3LM4JwzkGH2XOr0WSp1Au5YoR07rSA=";
   };
 
-  nativeBuildInputs = [ libpng ninja which ] ++ lib.optionals cudaSupport [ cuda-native-redist ];
+  nativeBuildInputs = [
+    libpng
+    ninja
+    which
+  ] ++ lib.optionals cudaSupport [
+    cudaPackages.cuda_nvcc
+  ];
 
-  buildInputs = [ libjpeg_turbo libpng ] ++ lib.optionals cudaSupport [ cuda-redist ];
+  buildInputs = [ libjpeg_turbo libpng torch.cxxdev ];
 
   propagatedBuildInputs = [ numpy pillow torch scipy ];