diff --git a/.ci_support/linux_aarch64_c_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version13is_rcFalse.yaml b/.ci_support/linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version13is_rcFalse.yaml
similarity index 100%
rename from .ci_support/linux_aarch64_c_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version13is_rcFalse.yaml
rename to .ci_support/linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version13is_rcFalse.yaml
diff --git a/.ci_support/linux_aarch64_c_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_versionNonecxx_compiler_version13is_rcFalse.yaml b/.ci_support/linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_versionNonecxx_compiler_version13is_rcFalse.yaml
similarity index 100%
rename from .ci_support/linux_aarch64_c_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_versionNonecxx_compiler_version13is_rcFalse.yaml
rename to .ci_support/linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_versionNonecxx_compiler_version13is_rcFalse.yaml
diff --git a/.ci_support/linux_aarch64_c_compiler_version13c_stdlib_version2.28channel_targetsconda-forge_maincuda_compiler_version13.0cxx_compiler_version13is_rcFalse.yaml b/.ci_support/linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.28channel_targetsconda-forge_maincuda_compiler_version13.0cxx_compiler_version13is_rcFalse.yaml
similarity index 100%
rename from .ci_support/linux_aarch64_c_compiler_version13c_stdlib_version2.28channel_targetsconda-forge_maincuda_compiler_version13.0cxx_compiler_version13is_rcFalse.yaml
rename to .ci_support/linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.28channel_targetsconda-forge_maincuda_compiler_version13.0cxx_compiler_version13is_rcFalse.yaml
diff --git a/.ci_support/linux_aarch64_c_compiler_version14c_stdlib_version2.34channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version14is_rcFalse.yaml b/.ci_support/linux_aarch64_arm_variant_typetegrac_compiler_version14c_stdlib_version2.34channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version14is_rcFalse.yaml
similarity index 100%
rename from .ci_support/linux_aarch64_c_compiler_version14c_stdlib_version2.34channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version14is_rcFalse.yaml
rename to .ci_support/linux_aarch64_arm_variant_typetegrac_compiler_version14c_stdlib_version2.34channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version14is_rcFalse.yaml
diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml
index d2bd4382..23b5673c 100644
--- a/.github/workflows/conda-build.yml
+++ b/.github/workflows/conda-build.yml
@@ -70,7 +70,7 @@ jobs:
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
             tools_install_dir: ~/miniforge3
             build_workspace_dir: build_artifacts
-          - CONFIG: linux_aarch64_c_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version13is_rcFalse
+          - CONFIG: linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version13is_rcFalse
             STORE_BUILD_ARTIFACTS: False
             UPLOAD_PACKAGES: True
             os: ubuntu
@@ -78,7 +78,7 @@ jobs:
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
             tools_install_dir: ~/miniforge3
             build_workspace_dir: build_artifacts
-          - CONFIG: linux_aarch64_c_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_versionNonecxx_compiler_version13is_rcFalse
+          - CONFIG: linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_versionNonecxx_compiler_version13is_rcFalse
             STORE_BUILD_ARTIFACTS: False
             UPLOAD_PACKAGES: True
             os: ubuntu
@@ -86,7 +86,7 @@ jobs:
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
             tools_install_dir: ~/miniforge3
             build_workspace_dir: build_artifacts
-          - CONFIG: linux_aarch64_c_compiler_version13c_stdlib_version2.28channel_targetsconda-forge_maincuda_compiler_version13.0cxx_compiler_version13is_rcFalse
+          - CONFIG: linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.28channel_targetsconda-forge_maincuda_compiler_version13.0cxx_compiler_version13is_rcFalse
             STORE_BUILD_ARTIFACTS: False
             UPLOAD_PACKAGES: True
             os: ubuntu
@@ -94,7 +94,7 @@ jobs:
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
             tools_install_dir: ~/miniforge3
             build_workspace_dir: build_artifacts
-          - CONFIG: linux_aarch64_c_compiler_version14c_stdlib_version2.34channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version14is_rcFalse
+          - CONFIG: linux_aarch64_arm_variant_typetegrac_compiler_version14c_stdlib_version2.34channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version14is_rcFalse
             STORE_BUILD_ARTIFACTS: False
             UPLOAD_PACKAGES: True
             os: ubuntu
diff --git a/recipe/build.sh b/recipe/build.sh
index 00660af3..33d51ba4 100644
--- a/recipe/build.sh
+++ b/recipe/build.sh
@@ -16,6 +16,11 @@ export PACKAGE_TYPE=conda
 # remove pyproject.toml to avoid installing deps from pip
 rm -rf pyproject.toml
 
+# remove runtime pin for setuptools, upstream added it to workaround
+# breakage from transitive dependencies using pkg_resources. we can handle
+# these dependencies directly in conda-forge.
+sed -i -e '/setuptools<82/d' setup.py
+
 # uncomment to debug cmake build
 # export CMAKE_VERBOSE_MAKEFILE=1
 
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 8aeca8a0..e135d5e9 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -1,6 +1,6 @@
-# if you wish to build release candidate number X, append the version string with ".rcX"
-{% set version = "2.10.0" %}
-{% set build = 4 %}
+# if you wish to build release candidate number X, append the version string with "-rcX"
+{% set version = "2.11.0" %}
+{% set build = 0 %}
 
 # Use a higher build number for the CUDA variant, to ensure that it's
 # preferred by conda's solver, and it's preferentially
@@ -25,11 +25,11 @@ package:
 source:
 {% if "rc" in version %}
   - url: https://download.pytorch.org/source_code/test/pytorch-v{{ version }}.tar.gz
-    sha256: f35b2d7839b284410e5be9ec2eeb7a3049e09c1b8f6a871d3f2cad495d93dcd6
+    sha256: 3035931fff5b79e0300db69b0249e0c9c7ea5b394f451cb80e0280cbc2affcc2
 {% else %}
   # The "pytorch-v" tarballs contain submodules; the "pytorch-" ones don't.
   - url: https://github.com/pytorch/pytorch/releases/download/v{{ version }}/pytorch-v{{ version }}.tar.gz
-    sha256: fa8ccbe87f83f48735505371c1c313b4aa6db400b0ae4f8a02844d1e150c695f
+    sha256: ab3fde9e7e382f45ac942be6ea2c2ef362c5ccd6f55ed6d5f35e6ea81d3ab88e
 {% endif %}
     patches:
       - patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch
@@ -50,12 +50,11 @@ source:
       # backport https://github.com/pytorch/pytorch/pull/166824
       - patches/0011-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch
       - patches/0012-Fix-building-kineto-against-system-fmt.patch
-      # backport https://github.com/pytorch/pytorch/pull/159828
-      - patches/0013-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch
       - patches/0014-Use-Intel-LLVM-openmp.patch
-      # backport https://github.com/pytorch/pytorch/pull/174647
-      - patches/0015-Fix-ICE-in-GCC-14-with-arm.patch
       - patches/0016-for-win-CUDA-remove-USE_CUDA-guard-for-skip-in-compi.patch         # [cuda_compiler_version != "None"]
+      # backport https://github.com/pytorch/pytorch/pull/175283
+      - patches/0017-Ensure-test_tensorinv-uses-well-conditioned-inputs-1.patch
+      - patches/0018-Declare-_tryToInferTypeImpl-with-TORCH_PYTHON_API.patch
       - patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch
 
 build:
@@ -132,13 +131,14 @@ requirements:
     - libcusparse-dev
     - libmagma-devel
     - nccl                  # [linux]
+    - nccl <2.29            # [linux and cuda_compiler_version=="12.9" and arm_variant_type!="tegra"]
     - nvtx-c
     {% endif %}
     # other requirements
     - python 3.12
     - numpy *
     - pip
-    - setuptools
+    - setuptools <82
     - pyyaml
     - requests
     - six
@@ -278,13 +278,14 @@ outputs:
         - libcusparse-dev
         - libmagma-devel
         - nccl                  # [linux]
+        - nccl <2.29            # [linux and cuda_compiler_version=="12.9" and arm_variant_type!="tegra"]
         - nvtx-c
         {% endif %}
         # other requirements
         - python
         - numpy
         - pip
-        - setuptools
+        - setuptools <82
         - pyyaml
         - requests
         - six
@@ -325,11 +326,9 @@ outputs:
         - pybind11
         # https://github.com/pytorch/pytorch/pull/175115
         - pybind11 <3.0.2
-        - setuptools
+        - setuptools <82
         - sympy >=1.13.3
         - typing_extensions >=4.10.0
-        # see https://github.com/conda-forge/sympy-feedstock/issues/67
-        - mpmath <1.4
       run_constrained:
         # https://github.com/conda-forge/pytorch-cpu-feedstock/issues/65
         - pytorch-cpu {{ version }}    # [cuda_compiler_version == "None"]
@@ -517,6 +516,23 @@ outputs:
         {% set skips = skips ~ " or test_qengine" %}                                    # [osx and arm64]
         # flaky failure on osx
         {% set skips = skips ~ " or test_LayerNorm_numeric_mps" %}                      # [osx and arm64]
+        # precision errors
+        {% set skips = skips ~ " or test_forward_nn_Linear" %}                          # [osx and arm64]
+        {% set skips = skips ~ " or test_forward_nn_TransformerEncoderLayer_train_mode_mps_float16" %}  # [osx and arm64]
+        {% set skips = skips ~ " or test_non_contiguous_tensors_nn_GRUCell_mps" %}      # [osx and arm64]
+        {% set skips = skips ~ " or test_non_contiguous_tensors_nn_GRU_eval_mode_mps" %}  # [osx and arm64]
+        {% set skips = skips ~ " or test_non_contiguous_tensors_nn_GRU_train_mode_mps" %}  # [osx and arm64]
+        {% set skips = skips ~ " or test_non_contiguous_tensors_nn_LSTMCell_mps" %}     # [osx and arm64]
+        {% set skips = skips ~ " or test_non_contiguous_tensors_nn_Linear_mps" %}       # [osx and arm64]
+        {% set skips = skips ~ " or test_non_contiguous_tensors_nn_MultiheadAttention_eval_mode_mps_float16" %}  # [osx and arm64]
+        {% set skips = skips ~ " or test_non_contiguous_tensors_nn_MultiheadAttention_train_mode_mps_float16" %}  # [osx and arm64]
+        {% set skips = skips ~ " or test_non_contiguous_tensors_nn_RNNCell_mps" %}      # [osx and arm64]
+        {% set skips = skips ~ " or test_non_contiguous_tensors_nn_RNN_eval_mode_mps" %}  # [osx and arm64]
+        {% set skips = skips ~ " or test_non_contiguous_tensors_nn_RNN_train_mode_mps" %}  # [osx and arm64]
+        {% set skips = skips ~ " or test_transformerencoderlayer_mps_float32" %}        # [osx and arm64]
+        {% set skips = skips ~ " or test_transformerencoderlayer_gelu_mps_float32" %}   # [osx and arm64]
+        {% set skips = skips ~ " or test_grad_nn_MultiheadAttention_eval_mode_cpu_float64" %}   # [osx and arm64]
+        {% set skips = skips ~ " or test_non_contiguous_tensors_nn_CrossEntropyLoss_mps_float32" %}   # [osx and arm64]
         # some warning-related failure, maybe it's broken by --disable-warnings?
         {% set skips = skips ~ " or test_cpp_warnings_have_python_context_cpu" %}
         {% set skips = skips ~ " or test_cpp_warnings_have_python_context_cuda" %}
@@ -533,15 +549,15 @@ outputs:
         # disable hypothesis because it randomly yields health check errors
 
         # the opengpu server has a card with sm_70, an architecture dropped by CUDA 13.0
-        {% if (cuda_compiler_version or "0").split(".")[0]|int < 13 %}
-        - pytest {{ jobs }} {{ tests }} -k "not ({{ skips }})" -m "not hypothesis" --durations=50 --timeout=1200 --disable-warnings    # [not aarch64 or py==312]
-        {% endif %}
+        - set ONEDNN_VERBOSE=all       # [win]
+        - export ONEDNN_VERBOSE=all    # [unix]
+        - pytest -v {{ jobs }} {{ tests }} -k "not ({{ skips }})" -m "not hypothesis" --durations=50 --timeout=1200 --disable-warnings --force-short-summary    # [not aarch64 or py==312]
 
         # regression test for https://github.com/conda-forge/pytorch-cpu-feedstock/issues/329, where we picked up
         # duplicate `.pyc` files due to newest py-ver (3.13) in the build environment not matching the one in host;
         # obviously this test can only be done for other python versions.
-        - test ! -f $SP_DIR/functorch/__pycache__/__init__.cpython-313.pyc          # [py!=313 and unix]
-        - if exist %SP_DIR%\functorch\__pycache__\__init__.cpython-313.pyc exit 1   # [py!=313 and win]
+        - test ! -f $SP_DIR/functorch/__pycache__/__init__.cpython-314.pyc          # [py!=314 and unix]
+        - if exist %SP_DIR%\functorch\__pycache__\__init__.cpython-314.pyc exit 1   # [py!=314 and win]
 
         # test integrity of CMake metadata and ensure that THPLayoutType is visible as a symbol from libtorch_python
         - cd cmake_test
diff --git a/recipe/patches/0005-use-our-own-PREFIX-for-include-paths-etc.patch b/recipe/patches/0005-use-our-own-PREFIX-for-include-paths-etc.patch
index 18ca639b..69e10477 100644
--- a/recipe/patches/0005-use-our-own-PREFIX-for-include-paths-etc.patch
+++ b/recipe/patches/0005-use-our-own-PREFIX-for-include-paths-etc.patch
@@ -1,7 +1,7 @@
-From 44659d96e67c15c4b626ae98046898f0de47d93b Mon Sep 17 00:00:00 2001
+From 97ef248e4150e8cace5f21c8f8fa9eb87f768ff0 Mon Sep 17 00:00:00 2001
 From: "H. Vetinari" <h.vetinari@gmx.com>
 Date: Thu, 23 Jan 2025 22:58:14 +1100
-Subject: [PATCH 05/16] use our own PREFIX for include paths etc.
+Subject: [PATCH 05/13] use our own PREFIX for include paths etc.
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit
@@ -33,39 +33,40 @@ Co-Authored-By: Daniel Petry <dpetry@anaconda.com>
 Co-Authored-By: Michał Górny <mgorny@quansight.com>
 Co-Authored-By: Tobias Fischer <info@tobiasfischer.info>
 ---
- cmake/TorchConfig.cmake.in     |  7 ++++---
+ cmake/TorchConfig.cmake.in     | 13 ++++++++++---
  torch/_inductor/cpp_builder.py |  4 +++-
  torch/utils/cpp_extension.py   | 34 ++++++++++++++++++++--------------
- 3 files changed, 27 insertions(+), 18 deletions(-)
+ 3 files changed, 33 insertions(+), 18 deletions(-)
 
 diff --git a/cmake/TorchConfig.cmake.in b/cmake/TorchConfig.cmake.in
-index 0b32ffa99ce..dcce7b38015 100644
+index abf5c814911..475dc3b88ce 100644
 --- a/cmake/TorchConfig.cmake.in
 +++ b/cmake/TorchConfig.cmake.in
-@@ -53,14 +53,15 @@ else()
+@@ -53,9 +53,16 @@ else()
  endif()
  
  # Include directories.
--if(EXISTS "${TORCH_INSTALL_PREFIX}/include")
+-set(TORCH_INCLUDE_DIRS
+-  ${TORCH_INSTALL_PREFIX}/include
+-  ${TORCH_INSTALL_PREFIX}/include/torch/csrc/api/include)
 +if(EXISTS "${TORCH_INSTALL_PREFIX}/include/torch/csrc/api/include")
 +  # top-level include directory
-   set(TORCH_INCLUDE_DIRS
--    ${TORCH_INSTALL_PREFIX}/include
-     ${TORCH_INSTALL_PREFIX}/include/torch/csrc/api/include)
- else()
++  set(TORCH_INCLUDE_DIRS
++    ${TORCH_INSTALL_PREFIX}/include/torch/csrc/api/include)
++else()
 +  # site-packages include directory
-   set(TORCH_INCLUDE_DIRS
-     ${TORCH_INSTALL_PREFIX}/include
--    ${TORCH_INSTALL_PREFIX}/include/torch/csrc/api/include)
++  set(TORCH_INCLUDE_DIRS
++    ${TORCH_INSTALL_PREFIX}/include
 +    ${TORCH_INSTALL_PREFIX}/../../../../include/torch/csrc/api/include)
- endif()
++endif()
  
  # Library dependencies.
+ if(@BUILD_SHARED_LIBS@)
 diff --git a/torch/_inductor/cpp_builder.py b/torch/_inductor/cpp_builder.py
-index 6a6b7d15ae3..0a4724e5c17 100644
+index 6dd6e0d2b5c..fbfa3175836 100644
 --- a/torch/_inductor/cpp_builder.py
 +++ b/torch/_inductor/cpp_builder.py
-@@ -1520,10 +1520,12 @@ def get_cpp_torch_options(
+@@ -1522,10 +1522,12 @@ def get_cpp_torch_options(
          + python_include_dirs
          + torch_include_dirs
          + omp_include_dir_paths
@@ -80,10 +81,10 @@ index 6a6b7d15ae3..0a4724e5c17 100644
      passthrough_args = (
          sys_libs_passthrough_args + isa_ps_args_build_flags + omp_passthrough_args
 diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py
-index f29c382f0e3..d865df1684b 100644
+index a63bff50d5e..7da14c2429c 100644
 --- a/torch/utils/cpp_extension.py
 +++ b/torch/utils/cpp_extension.py
-@@ -1567,31 +1567,37 @@ def include_paths(device_type: str = "cpu", torch_include_dirs=True) -> list[str
+@@ -1604,31 +1604,37 @@ def include_paths(device_type: str = "cpu", torch_include_dirs=True) -> list[str
      Returns:
          A list of include path strings.
      """
diff --git a/recipe/patches/0013-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch b/recipe/patches/0013-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch
deleted file mode 100644
index a070adcd..00000000
--- a/recipe/patches/0013-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch
+++ /dev/null
@@ -1,131 +0,0 @@
-From 78e844bbba7ad96e1e84926b347988511fd5f5d0 Mon Sep 17 00:00:00 2001
-From: Eddie Yan <eddiey@nvidia.com>
-Date: Tue, 5 Aug 2025 00:49:50 +0000
-Subject: [PATCH 13/16] Attempt to fix torch.backends.cudnn.rnn import
-
-torch.backends.cudnn module in order to expose the .conv.fp32_precision
-and .rnn.fp32_precision settings. However, it fails to account for the
-existing torch.backends.cudnn.rnn module, which if imported after leaves
-us in a limbo state where the additional .rnn.fp32_precision property is
-no longer accessible.
-
-This PR is WIP and attempts to remedy this by propagating the hack and
-replaces the RNN module with a similar PropertyModule replacement. There
-is more than one wart, e.g., a duplicate ContextProp definition in
-rnn.py as the original one in backends seems to be too strict in its
-frozen flags check.
----
- test/test_cuda.py                |  2 ++
- torch/backends/cudnn/__init__.py |  3 ++-
- torch/backends/cudnn/rnn.py      | 40 +++++++++++++++++++++++++++++++-
- 3 files changed, 43 insertions(+), 2 deletions(-)
-
-diff --git a/test/test_cuda.py b/test/test_cuda.py
-index 0ebfe192f8d..2aafc98064b 100644
---- a/test/test_cuda.py
-+++ b/test/test_cuda.py
-@@ -853,6 +853,7 @@ print(t.is_pinned())
-             self.assertEqual(torch.backends.cudnn.rnn.fp32_precision, "none")
- 
-     @recover_orig_fp32_precision
-+    @serialTest()
-     def test_fp32_precision_with_float32_matmul_precision(self):
-         torch.set_float32_matmul_precision("highest")
-         self.assertEqual(torch.backends.cuda.matmul.fp32_precision, "ieee")
-@@ -862,6 +863,7 @@ print(t.is_pinned())
-         self.assertEqual(torch.backends.cuda.matmul.fp32_precision, "tf32")
- 
-     @recover_orig_fp32_precision
-+    @serialTest()
-     def test_invalid_status_for_legacy_api(self):
-         torch.backends.cudnn.conv.fp32_precision = "none"
-         torch.backends.cudnn.rnn.fp32_precision = "tf32"
-diff --git a/torch/backends/cudnn/__init__.py b/torch/backends/cudnn/__init__.py
-index 5cd6ec297c7..d5bb6926840 100644
---- a/torch/backends/cudnn/__init__.py
-+++ b/torch/backends/cudnn/__init__.py
-@@ -15,6 +15,8 @@ from torch.backends import (
-     PropModule,
- )
- 
-+from . import rnn
-+
- 
- try:
-     from torch._C import _cudnn
-@@ -229,7 +231,6 @@ class CudnnModule(PropModule):
-         torch._C._get_cudnn_allow_tf32, torch._C._set_cudnn_allow_tf32
-     )
-     conv = _FP32Precision("cuda", "conv")
--    rnn = _FP32Precision("cuda", "rnn")
-     fp32_precision = ContextProp(
-         _get_fp32_precision_getter("cuda", "all"),
-         _set_fp32_precision_setter("cuda", "all"),
-diff --git a/torch/backends/cudnn/rnn.py b/torch/backends/cudnn/rnn.py
-index 0dc9ca80aa6..9281234ae3e 100644
---- a/torch/backends/cudnn/rnn.py
-+++ b/torch/backends/cudnn/rnn.py
-@@ -1,5 +1,13 @@
- # mypy: allow-untyped-defs
-+import sys
-+
-+import torch._C
- import torch.cuda
-+from torch.backends import (
-+    _get_fp32_precision_getter,
-+    _set_fp32_precision_setter,
-+    PropModule,
-+)
- 
- 
- try:
-@@ -24,7 +32,7 @@ def get_cudnn_mode(mode):
-         # pyrefly: ignore [missing-attribute]
-         return int(_cudnn.RNNMode.gru)
-     else:
--        raise Exception(f"Unknown mode: {mode}")  # noqa: TRY002
-+        raise ValueError(f"Unknown mode: {mode}")  # noqa: TRY002
- 
- 
- # NB: We don't actually need this class anymore (in fact, we could serialize the
-@@ -46,6 +54,20 @@ class Unserializable:
-         self.inner = None
- 
- 
-+# we would like to use ContextProp from backends here but the
-+# frozen flags appears to be overzealous
-+class ContextProp:
-+    def __init__(self, getter, setter):
-+        self.getter = getter
-+        self.setter = setter
-+
-+    def __get__(self, obj, objtype):
-+        return self.getter()
-+
-+    def __set__(self, obj, val):
-+        self.setter(val)
-+
-+
- def init_dropout_state(dropout, train, dropout_seed, dropout_state):
-     dropout_desc_name = "desc_" + str(torch.cuda.current_device())
-     dropout_p = dropout if train else 0
-@@ -67,3 +89,19 @@ def init_dropout_state(dropout, train, dropout_seed, dropout_state):
-             )
-     dropout_ts = dropout_state[dropout_desc_name].get()
-     return dropout_ts
-+
-+
-+class CudnnRNNModule(PropModule):
-+    def __init__(self, m, name):
-+        super().__init__(m, name)
-+        self.m.Unserializable = Unserializable
-+        self.m.get_cudnn_mode = get_cudnn_mode
-+        self.m.init_dropout_state = init_dropout_state
-+
-+    fp32_precision = ContextProp(
-+        _get_fp32_precision_getter("cuda", "rnn"),
-+        _set_fp32_precision_setter("cuda", "rnn"),
-+    )
-+
-+
-+sys.modules[__name__] = CudnnRNNModule(sys.modules[__name__], __name__)
diff --git a/recipe/patches/0015-Fix-ICE-in-GCC-14-with-arm.patch b/recipe/patches/0015-Fix-ICE-in-GCC-14-with-arm.patch
deleted file mode 100644
index 162a68e4..00000000
--- a/recipe/patches/0015-Fix-ICE-in-GCC-14-with-arm.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From 5b384c3e8723023fe20ac4afbf48914b7e092860 Mon Sep 17 00:00:00 2001
-From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
-Date: Tue, 10 Feb 2026 04:35:39 +0000
-Subject: [PATCH 15/16] Fix ICE in GCC 14 with arm
-
-Updated preprocessor directive for GCC version check and removed BF16 condition. I.e. right now SVE256 compilation with gcc-14.2 on Debian13 for ` -march=armv8-a+sve+bf16`
-
-Without the fix, compilation fails with
-```
-In file included from /home/dev/git/pytorch/pytorch/build/aten/src/ATen/native/cpu/Unfold2d.cpp.SVE256.cpp:1:
-/home/dev/git/pytorch/pytorch/aten/src/ATen/native/cpu/Unfold2d.cpp: In function 'void at::native::{anonymous}::unfolded2d_acc_kernel(c10::ScalarType, void*, void*, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, bool)':
-/home/dev/git/pytorch/pytorch/aten/src/ATen/native/cpu/Unfold2d.cpp:225:1: error: unrecognizable insn:
-  225 | }
-      | ^
-(insn 1371 1370 1372 101 (set (reg:VNx16BI 3235)
-        (unspec:VNx16BI [
-                (reg:VNx16BI 3232)
-                (reg:VNx8BI 3234)
-                (const_vector:VNx4BI [
-                        (const_int 0 [0]) repeated x8
-                    ])
-            ] UNSPEC_TRN1_CONV)) "/home/dev/git/pytorch/pytorch/torch/headeronly/util/bit_cast.h":40:14 -1
-     (nil))
-during RTL pass: vregs
-/home/dev/git/pytorch/pytorch/aten/src/ATen/native/cpu/Unfold2d.cpp:225:1: internal compiler error: in extract_insn, at recog.cc:2812
-```
-
-Not sure what compelled me to put such a narrow restriction in https://github.com/pytorch/pytorch/pull/157867
-
-Fixes https://github.com/pytorch/pytorch/issues/172630
-
-Pull Request resolved: https://github.com/pytorch/pytorch/pull/174647
-Approved by: https://github.com/seemethere
----
- aten/src/ATen/native/cpu/Unfold2d.cpp | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/aten/src/ATen/native/cpu/Unfold2d.cpp b/aten/src/ATen/native/cpu/Unfold2d.cpp
-index ed69998e99f..9ae1391e260 100644
---- a/aten/src/ATen/native/cpu/Unfold2d.cpp
-+++ b/aten/src/ATen/native/cpu/Unfold2d.cpp
-@@ -169,8 +169,9 @@ void unfolded2d_acc_channels_last(
- 
- /* note: due to write issues, this one cannot be parallelized as well as
-  * unfolded2d_copy */
--#if defined(__GNUC__) && __GNUC__ == 14 && defined(__ARM_FEATURE_SVE) && !defined(__ARM_FEATURE_BF16)
--// Workaround for gcc-14.2.0 ICE during RTL pass: vregs when compiling for SVE without BF16
-+#if defined(__GNUC__) && __GNUC__ == 14 && defined(__ARM_FEATURE_SVE)
-+// Workaround for gcc-14.2.0 ICE during RTL pass: vregs when compiling for SVE
-+// NS: With or without BF16, see https://github.com/pytorch/pytorch/issues/172630
- __attribute__((optimize("no-tree-vectorize")))
- #endif
- void unfolded2d_acc_kernel(
diff --git a/recipe/patches/0017-Ensure-test_tensorinv-uses-well-conditioned-inputs-1.patch b/recipe/patches/0017-Ensure-test_tensorinv-uses-well-conditioned-inputs-1.patch
new file mode 100644
index 00000000..6438a8cf
--- /dev/null
+++ b/recipe/patches/0017-Ensure-test_tensorinv-uses-well-conditioned-inputs-1.patch
@@ -0,0 +1,43 @@
+From dc1d381cbb45c75ae280c1493663fc38fa4b27f1 Mon Sep 17 00:00:00 2001
+From: Grayson Derossi <graysonderossi@gmail.com>
+Date: Thu, 19 Feb 2026 21:17:45 +0000
+Subject: [PATCH] Ensure test_tensorinv uses well-conditioned inputs (#175283)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+`test_tensorinv_cuda_float32` is failing on multiple GPU types because one of the test matrices is ill-conditioned and has potential for numerical error that's right on the bubble given the current tolerance. Changing the underlying algorithm from using a transpose to not using a transpose was enough to shift this test from passing to failing.
+
+This PR changes the setup of this test to remove the precision override and instead use `make_fullrank_matrices_with_distinct_singular_values` to ensure that inputs are well-conditioned, like is already done for `test_linalg_lu_family`.
+
+Fixes #175282
+
+Pull Request resolved: https://github.com/pytorch/pytorch/pull/175283
+Approved by: https://github.com/malfet
+
+Signed-off-by: Michał Górny <mgorny@quansight.com>
+---
+ test/test_linalg.py | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/test/test_linalg.py b/test/test_linalg.py
+index 0399bcd0ff0..dc93bb96c04 100644
+--- a/test/test_linalg.py
++++ b/test/test_linalg.py
+@@ -3751,11 +3751,14 @@ class TestLinalg(TestCase):
+     @skipCUDAIfNoMagma
+     @skipCPUIfNoLapack
+     @dtypes(*floating_and_complex_types())
+-    @precisionOverride({torch.float: 1e-3, torch.cfloat: 1e-3})
+     def test_tensorinv(self, device, dtype):
++        make_fullrank = make_fullrank_matrices_with_distinct_singular_values
+ 
+         def run_test(a_shape, ind):
+-            a = torch.randn(a_shape, dtype=dtype, device=device)
++            n = 1
++            for s in a_shape[:ind]:
++                n *= s
++            a = make_fullrank(n, n, dtype=dtype, device=device).reshape(a_shape)
+             a_numpy = a.cpu().numpy()
+             result = torch.linalg.tensorinv(a, ind=ind)
+             expected = np.linalg.tensorinv(a_numpy, ind=ind)
diff --git a/recipe/patches/0018-Declare-_tryToInferTypeImpl-with-TORCH_PYTHON_API.patch b/recipe/patches/0018-Declare-_tryToInferTypeImpl-with-TORCH_PYTHON_API.patch
new file mode 100644
index 00000000..679631b0
--- /dev/null
+++ b/recipe/patches/0018-Declare-_tryToInferTypeImpl-with-TORCH_PYTHON_API.patch
@@ -0,0 +1,26 @@
+From 1ddba0ff755b9a1291be5b1543a50aed154423cb Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@quansight.com>
+Date: Fri, 24 Apr 2026 20:20:05 +0200
+Subject: [PATCH 17/17] Declare `_tryToInferTypeImpl` with `TORCH_PYTHON_API`
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Signed-off-by: Michał Górny <mgorny@quansight.com>
+---
+ torch/csrc/jit/python/pybind_utils.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/torch/csrc/jit/python/pybind_utils.h b/torch/csrc/jit/python/pybind_utils.h
+index 378bf4636fd..bad53cd9bb2 100644
+--- a/torch/csrc/jit/python/pybind_utils.h
++++ b/torch/csrc/jit/python/pybind_utils.h
+@@ -373,7 +373,7 @@ InferredType tryToInferContainerType(py::handle input, bool primitiveTypeOnly);
+ namespace detail {
+ 
+ // Additional implementations for tryToInferType().
+-std::optional<InferredType> _tryToInferTypeImpl(py::handle input);
++TORCH_PYTHON_API std::optional<InferredType> _tryToInferTypeImpl(py::handle input);
+ 
+ } // namespace detail
+