diff --git a/.ci_support/linux_aarch64_c_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version13is_rcFalse.yaml b/.ci_support/linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version13is_rcFalse.yaml similarity index 100% rename from .ci_support/linux_aarch64_c_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version13is_rcFalse.yaml rename to .ci_support/linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version13is_rcFalse.yaml diff --git a/.ci_support/linux_aarch64_c_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_versionNonecxx_compiler_version13is_rcFalse.yaml b/.ci_support/linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_versionNonecxx_compiler_version13is_rcFalse.yaml similarity index 100% rename from .ci_support/linux_aarch64_c_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_versionNonecxx_compiler_version13is_rcFalse.yaml rename to .ci_support/linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_versionNonecxx_compiler_version13is_rcFalse.yaml diff --git a/.ci_support/linux_aarch64_c_compiler_version13c_stdlib_version2.28channel_targetsconda-forge_maincuda_compiler_version13.0cxx_compiler_version13is_rcFalse.yaml b/.ci_support/linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.28channel_targetsconda-forge_maincuda_compiler_version13.0cxx_compiler_version13is_rcFalse.yaml similarity index 100% rename from .ci_support/linux_aarch64_c_compiler_version13c_stdlib_version2.28channel_targetsconda-forge_maincuda_compiler_version13.0cxx_compiler_version13is_rcFalse.yaml rename to .ci_support/linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.28channel_targetsconda-forge_maincuda_compiler_version13.0cxx_compiler_version13is_rcFalse.yaml diff --git a/.ci_support/linux_aarch64_c_compiler_version14c_stdlib_version2.34channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version14is_rcFalse.yaml b/.ci_support/linux_aarch64_arm_variant_typetegrac_compiler_version14c_stdlib_version2.34channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version14is_rcFalse.yaml similarity index 100% rename from .ci_support/linux_aarch64_c_compiler_version14c_stdlib_version2.34channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version14is_rcFalse.yaml rename to .ci_support/linux_aarch64_arm_variant_typetegrac_compiler_version14c_stdlib_version2.34channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version14is_rcFalse.yaml diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index d2bd4382..23b5673c 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -70,7 +70,7 @@ jobs: DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 tools_install_dir: ~/miniforge3 build_workspace_dir: build_artifacts - - CONFIG: linux_aarch64_c_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version13is_rcFalse + - CONFIG: linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version13is_rcFalse STORE_BUILD_ARTIFACTS: False UPLOAD_PACKAGES: True os: ubuntu @@ -78,7 +78,7 @@ jobs: DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 tools_install_dir: ~/miniforge3 build_workspace_dir: build_artifacts - - CONFIG: linux_aarch64_c_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_versionNonecxx_compiler_version13is_rcFalse + - CONFIG: linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.17channel_targetsconda-forge_maincuda_compiler_versionNonecxx_compiler_version13is_rcFalse STORE_BUILD_ARTIFACTS: False UPLOAD_PACKAGES: True os: ubuntu @@ -86,7 +86,7 @@ jobs: DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 tools_install_dir: ~/miniforge3 build_workspace_dir: build_artifacts - - CONFIG: linux_aarch64_c_compiler_version13c_stdlib_version2.28channel_targetsconda-forge_maincuda_compiler_version13.0cxx_compiler_version13is_rcFalse + - CONFIG: linux_aarch64_arm_variant_typesbsac_compiler_version13c_stdlib_version2.28channel_targetsconda-forge_maincuda_compiler_version13.0cxx_compiler_version13is_rcFalse STORE_BUILD_ARTIFACTS: False UPLOAD_PACKAGES: True os: ubuntu @@ -94,7 +94,7 @@ jobs: DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 tools_install_dir: ~/miniforge3 build_workspace_dir: build_artifacts - - CONFIG: linux_aarch64_c_compiler_version14c_stdlib_version2.34channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version14is_rcFalse + - CONFIG: linux_aarch64_arm_variant_typetegrac_compiler_version14c_stdlib_version2.34channel_targetsconda-forge_maincuda_compiler_version12.9cxx_compiler_version14is_rcFalse STORE_BUILD_ARTIFACTS: False UPLOAD_PACKAGES: True os: ubuntu diff --git a/recipe/build.sh b/recipe/build.sh index 00660af3..33d51ba4 100644 --- a/recipe/build.sh +++ b/recipe/build.sh @@ -16,6 +16,11 @@ export PACKAGE_TYPE=conda # remove pyproject.toml to avoid installing deps from pip rm -rf pyproject.toml +# remove runtime pin for setuptools, upstream added it to workaround +# breakage from transitive dependencies using pkg_resources. we can handle +# these dependencies directly in conda-forge. +sed -i -e '/setuptools<82/d' setup.py + # uncomment to debug cmake build # export CMAKE_VERBOSE_MAKEFILE=1 diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 8aeca8a0..e135d5e9 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -1,6 +1,6 @@ -# if you wish to build release candidate number X, append the version string with ".rcX" -{% set version = "2.10.0" %} -{% set build = 4 %} +# if you wish to build release candidate number X, append the version string with "-rcX" +{% set version = "2.11.0" %} +{% set build = 0 %} # Use a higher build number for the CUDA variant, to ensure that it's # preferred by conda's solver, and it's preferentially @@ -25,11 +25,11 @@ package: source: {% if "rc" in version %} - url: https://download.pytorch.org/source_code/test/pytorch-v{{ version }}.tar.gz - sha256: f35b2d7839b284410e5be9ec2eeb7a3049e09c1b8f6a871d3f2cad495d93dcd6 + sha256: 3035931fff5b79e0300db69b0249e0c9c7ea5b394f451cb80e0280cbc2affcc2 {% else %} # The "pytorch-v" tarballs contain submodules; the "pytorch-" ones don't. - url: https://github.com/pytorch/pytorch/releases/download/v{{ version }}/pytorch-v{{ version }}.tar.gz - sha256: fa8ccbe87f83f48735505371c1c313b4aa6db400b0ae4f8a02844d1e150c695f + sha256: ab3fde9e7e382f45ac942be6ea2c2ef362c5ccd6f55ed6d5f35e6ea81d3ab88e {% endif %} patches: - patches/0001-Force-usage-of-python-3-and-error-without-numpy.patch @@ -50,12 +50,11 @@ source: # backport https://github.com/pytorch/pytorch/pull/166824 - patches/0011-Add-USE_SYSTEM-options-for-KLEIDI-CUDNN_FRONTEND-CUT.patch - patches/0012-Fix-building-kineto-against-system-fmt.patch - # backport https://github.com/pytorch/pytorch/pull/159828 - - patches/0013-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch - patches/0014-Use-Intel-LLVM-openmp.patch - # backport https://github.com/pytorch/pytorch/pull/174647 - - patches/0015-Fix-ICE-in-GCC-14-with-arm.patch - patches/0016-for-win-CUDA-remove-USE_CUDA-guard-for-skip-in-compi.patch # [cuda_compiler_version != "None"] + # backport https://github.com/pytorch/pytorch/pull/175283 + - patches/0017-Ensure-test_tensorinv-uses-well-conditioned-inputs-1.patch + - patches/0018-Declare-_tryToInferTypeImpl-with-TORCH_PYTHON_API.patch - patches_submodules/tensorpipe/0001-switch-away-from-find_package-CUDA.patch build: @@ -132,13 +131,14 @@ requirements: - libcusparse-dev - libmagma-devel - nccl # [linux] + - nccl <2.29 # [linux and cuda_compiler_version=="12.9" and arm_variant_type!="tegra"] - nvtx-c {% endif %} # other requirements - python 3.12 - numpy * - pip - - setuptools + - setuptools <82 - pyyaml - requests - six @@ -278,13 +278,14 @@ outputs: - libcusparse-dev - libmagma-devel - nccl # [linux] + - nccl <2.29 # [linux and cuda_compiler_version=="12.9" and arm_variant_type!="tegra"] - nvtx-c {% endif %} # other requirements - python - numpy - pip - - setuptools + - setuptools <82 - pyyaml - requests - six @@ -325,11 +326,9 @@ outputs: - pybind11 # https://github.com/pytorch/pytorch/pull/175115 - pybind11 <3.0.2 - - setuptools + - setuptools <82 - sympy >=1.13.3 - typing_extensions >=4.10.0 - # see https://github.com/conda-forge/sympy-feedstock/issues/67 - - mpmath <1.4 run_constrained: # https://github.com/conda-forge/pytorch-cpu-feedstock/issues/65 - pytorch-cpu {{ version }} # [cuda_compiler_version == "None"] @@ -517,6 +516,23 @@ outputs: {% set skips = skips ~ " or test_qengine" %} # [osx and arm64] # flaky failure on osx {% set skips = skips ~ " or test_LayerNorm_numeric_mps" %} # [osx and arm64] + # precision errors + {% set skips = skips ~ " or test_forward_nn_Linear" %} # [osx and arm64] + {% set skips = skips ~ " or test_forward_nn_TransformerEncoderLayer_train_mode_mps_float16" %} # [osx and arm64] + {% set skips = skips ~ " or test_non_contiguous_tensors_nn_GRUCell_mps" %} # [osx and arm64] + {% set skips = skips ~ " or test_non_contiguous_tensors_nn_GRU_eval_mode_mps" %} # [osx and arm64] + {% set skips = skips ~ " or test_non_contiguous_tensors_nn_GRU_train_mode_mps" %} # [osx and arm64] + {% set skips = skips ~ " or test_non_contiguous_tensors_nn_LSTMCell_mps" %} # [osx and arm64] + {% set skips = skips ~ " or test_non_contiguous_tensors_nn_Linear_mps" %} # [osx and arm64] + {% set skips = skips ~ " or test_non_contiguous_tensors_nn_MultiheadAttention_eval_mode_mps_float16" %} # [osx and arm64] + {% set skips = skips ~ " or test_non_contiguous_tensors_nn_MultiheadAttention_train_mode_mps_float16" %} # [osx and arm64] + {% set skips = skips ~ " or test_non_contiguous_tensors_nn_RNNCell_mps" %} # [osx and arm64] + {% set skips = skips ~ " or test_non_contiguous_tensors_nn_RNN_eval_mode_mps" %} # [osx and arm64] + {% set skips = skips ~ " or test_non_contiguous_tensors_nn_RNN_train_mode_mps" %} # [osx and arm64] + {% set skips = skips ~ " or test_transformerencoderlayer_mps_float32" %} # [osx and arm64] + {% set skips = skips ~ " or test_transformerencoderlayer_gelu_mps_float32" %} # [osx and arm64] + {% set skips = skips ~ " or test_grad_nn_MultiheadAttention_eval_mode_cpu_float64" %} # [osx and arm64] + {% set skips = skips ~ " or test_non_contiguous_tensors_nn_CrossEntropyLoss_mps_float32" %} # [osx and arm64] # some warning-related failure, maybe it's broken by --disable-warnings? {% set skips = skips ~ " or test_cpp_warnings_have_python_context_cpu" %} {% set skips = skips ~ " or test_cpp_warnings_have_python_context_cuda" %} @@ -533,15 +549,15 @@ outputs: # disable hypothesis because it randomly yields health check errors # the opengpu server has a card with sm_70, an architecture dropped by CUDA 13.0 - {% if (cuda_compiler_version or "0").split(".")[0]|int < 13 %} - - pytest {{ jobs }} {{ tests }} -k "not ({{ skips }})" -m "not hypothesis" --durations=50 --timeout=1200 --disable-warnings # [not aarch64 or py==312] - {% endif %} + - set ONEDNN_VERBOSE=all # [win] + - export ONEDNN_VERBOSE=all # [unix] + - pytest -v {{ jobs }} {{ tests }} -k "not ({{ skips }})" -m "not hypothesis" --durations=50 --timeout=1200 --disable-warnings --force-short-summary # [not aarch64 or py==312] # regression test for https://github.com/conda-forge/pytorch-cpu-feedstock/issues/329, where we picked up # duplicate `.pyc` files due to newest py-ver (3.13) in the build environment not matching the one in host; # obviously this test can only be done for other python versions. - - test ! -f $SP_DIR/functorch/__pycache__/__init__.cpython-313.pyc # [py!=313 and unix] - - if exist %SP_DIR%\functorch\__pycache__\__init__.cpython-313.pyc exit 1 # [py!=313 and win] + - test ! -f $SP_DIR/functorch/__pycache__/__init__.cpython-314.pyc # [py!=314 and unix] + - if exist %SP_DIR%\functorch\__pycache__\__init__.cpython-314.pyc exit 1 # [py!=314 and win] # test integrity of CMake metadata and ensure that THPLayoutType is visible as a symbol from libtorch_python - cd cmake_test diff --git a/recipe/patches/0005-use-our-own-PREFIX-for-include-paths-etc.patch b/recipe/patches/0005-use-our-own-PREFIX-for-include-paths-etc.patch index 18ca639b..69e10477 100644 --- a/recipe/patches/0005-use-our-own-PREFIX-for-include-paths-etc.patch +++ b/recipe/patches/0005-use-our-own-PREFIX-for-include-paths-etc.patch @@ -1,7 +1,7 @@ -From 44659d96e67c15c4b626ae98046898f0de47d93b Mon Sep 17 00:00:00 2001 +From 97ef248e4150e8cace5f21c8f8fa9eb87f768ff0 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 23 Jan 2025 22:58:14 +1100 -Subject: [PATCH 05/16] use our own PREFIX for include paths etc. +Subject: [PATCH 05/13] use our own PREFIX for include paths etc. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit @@ -33,39 +33,40 @@ Co-Authored-By: Daniel Petry Co-Authored-By: Michał Górny Co-Authored-By: Tobias Fischer --- - cmake/TorchConfig.cmake.in | 7 ++++--- + cmake/TorchConfig.cmake.in | 13 ++++++++++--- torch/_inductor/cpp_builder.py | 4 +++- torch/utils/cpp_extension.py | 34 ++++++++++++++++++++-------------- - 3 files changed, 27 insertions(+), 18 deletions(-) + 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/cmake/TorchConfig.cmake.in b/cmake/TorchConfig.cmake.in -index 0b32ffa99ce..dcce7b38015 100644 +index abf5c814911..475dc3b88ce 100644 --- a/cmake/TorchConfig.cmake.in +++ b/cmake/TorchConfig.cmake.in -@@ -53,14 +53,15 @@ else() +@@ -53,9 +53,16 @@ else() endif() # Include directories. --if(EXISTS "${TORCH_INSTALL_PREFIX}/include") +-set(TORCH_INCLUDE_DIRS +- ${TORCH_INSTALL_PREFIX}/include +- ${TORCH_INSTALL_PREFIX}/include/torch/csrc/api/include) +if(EXISTS "${TORCH_INSTALL_PREFIX}/include/torch/csrc/api/include") + # top-level include directory - set(TORCH_INCLUDE_DIRS -- ${TORCH_INSTALL_PREFIX}/include - ${TORCH_INSTALL_PREFIX}/include/torch/csrc/api/include) - else() ++ set(TORCH_INCLUDE_DIRS ++ ${TORCH_INSTALL_PREFIX}/include/torch/csrc/api/include) ++else() + # site-packages include directory - set(TORCH_INCLUDE_DIRS - ${TORCH_INSTALL_PREFIX}/include -- ${TORCH_INSTALL_PREFIX}/include/torch/csrc/api/include) ++ set(TORCH_INCLUDE_DIRS ++ ${TORCH_INSTALL_PREFIX}/include + ${TORCH_INSTALL_PREFIX}/../../../../include/torch/csrc/api/include) - endif() ++endif() # Library dependencies. + if(@BUILD_SHARED_LIBS@) diff --git a/torch/_inductor/cpp_builder.py b/torch/_inductor/cpp_builder.py -index 6a6b7d15ae3..0a4724e5c17 100644 +index 6dd6e0d2b5c..fbfa3175836 100644 --- a/torch/_inductor/cpp_builder.py +++ b/torch/_inductor/cpp_builder.py -@@ -1520,10 +1520,12 @@ def get_cpp_torch_options( +@@ -1522,10 +1522,12 @@ def get_cpp_torch_options( + python_include_dirs + torch_include_dirs + omp_include_dir_paths @@ -80,10 +81,10 @@ index 6a6b7d15ae3..0a4724e5c17 100644 passthrough_args = ( sys_libs_passthrough_args + isa_ps_args_build_flags + omp_passthrough_args diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py -index f29c382f0e3..d865df1684b 100644 +index a63bff50d5e..7da14c2429c 100644 --- a/torch/utils/cpp_extension.py +++ b/torch/utils/cpp_extension.py -@@ -1567,31 +1567,37 @@ def include_paths(device_type: str = "cpu", torch_include_dirs=True) -> list[str +@@ -1604,31 +1604,37 @@ def include_paths(device_type: str = "cpu", torch_include_dirs=True) -> list[str Returns: A list of include path strings. """ diff --git a/recipe/patches/0013-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch b/recipe/patches/0013-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch deleted file mode 100644 index a070adcd..00000000 --- a/recipe/patches/0013-Attempt-to-fix-torch.backends.cudnn.rnn-import.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 78e844bbba7ad96e1e84926b347988511fd5f5d0 Mon Sep 17 00:00:00 2001 -From: Eddie Yan -Date: Tue, 5 Aug 2025 00:49:50 +0000 -Subject: [PATCH 13/16] Attempt to fix torch.backends.cudnn.rnn import - -torch.backends.cudnn module in order to expose the .conv.fp32_precision -and .rnn.fp32_precision settings. However, it fails to account for the -existing torch.backends.cudnn.rnn module, which if imported after leaves -us in a limbo state where the additional .rnn.fp32_precision property is -no longer accessible. - -This PR is WIP and attempts to remedy this by propagating the hack and -replaces the RNN module with a similar PropertyModule replacement. There -is more than one wart, e.g., a duplicate ContextProp definition in -rnn.py as the original one in backends seems to be too strict in its -frozen flags check. ---- - test/test_cuda.py | 2 ++ - torch/backends/cudnn/__init__.py | 3 ++- - torch/backends/cudnn/rnn.py | 40 +++++++++++++++++++++++++++++++- - 3 files changed, 43 insertions(+), 2 deletions(-) - -diff --git a/test/test_cuda.py b/test/test_cuda.py -index 0ebfe192f8d..2aafc98064b 100644 ---- a/test/test_cuda.py -+++ b/test/test_cuda.py -@@ -853,6 +853,7 @@ print(t.is_pinned()) - self.assertEqual(torch.backends.cudnn.rnn.fp32_precision, "none") - - @recover_orig_fp32_precision -+ @serialTest() - def test_fp32_precision_with_float32_matmul_precision(self): - torch.set_float32_matmul_precision("highest") - self.assertEqual(torch.backends.cuda.matmul.fp32_precision, "ieee") -@@ -862,6 +863,7 @@ print(t.is_pinned()) - self.assertEqual(torch.backends.cuda.matmul.fp32_precision, "tf32") - - @recover_orig_fp32_precision -+ @serialTest() - def test_invalid_status_for_legacy_api(self): - torch.backends.cudnn.conv.fp32_precision = "none" - torch.backends.cudnn.rnn.fp32_precision = "tf32" -diff --git a/torch/backends/cudnn/__init__.py b/torch/backends/cudnn/__init__.py -index 5cd6ec297c7..d5bb6926840 100644 ---- a/torch/backends/cudnn/__init__.py -+++ b/torch/backends/cudnn/__init__.py -@@ -15,6 +15,8 @@ from torch.backends import ( - PropModule, - ) - -+from . import rnn -+ - - try: - from torch._C import _cudnn -@@ -229,7 +231,6 @@ class CudnnModule(PropModule): - torch._C._get_cudnn_allow_tf32, torch._C._set_cudnn_allow_tf32 - ) - conv = _FP32Precision("cuda", "conv") -- rnn = _FP32Precision("cuda", "rnn") - fp32_precision = ContextProp( - _get_fp32_precision_getter("cuda", "all"), - _set_fp32_precision_setter("cuda", "all"), -diff --git a/torch/backends/cudnn/rnn.py b/torch/backends/cudnn/rnn.py -index 0dc9ca80aa6..9281234ae3e 100644 ---- a/torch/backends/cudnn/rnn.py -+++ b/torch/backends/cudnn/rnn.py -@@ -1,5 +1,13 @@ - # mypy: allow-untyped-defs -+import sys -+ -+import torch._C - import torch.cuda -+from torch.backends import ( -+ _get_fp32_precision_getter, -+ _set_fp32_precision_setter, -+ PropModule, -+) - - - try: -@@ -24,7 +32,7 @@ def get_cudnn_mode(mode): - # pyrefly: ignore [missing-attribute] - return int(_cudnn.RNNMode.gru) - else: -- raise Exception(f"Unknown mode: {mode}") # noqa: TRY002 -+ raise ValueError(f"Unknown mode: {mode}") # noqa: TRY002 - - - # NB: We don't actually need this class anymore (in fact, we could serialize the -@@ -46,6 +54,20 @@ class Unserializable: - self.inner = None - - -+# we would like to use ContextProp from backends here but the -+# frozen flags appears to be overzealous -+class ContextProp: -+ def __init__(self, getter, setter): -+ self.getter = getter -+ self.setter = setter -+ -+ def __get__(self, obj, objtype): -+ return self.getter() -+ -+ def __set__(self, obj, val): -+ self.setter(val) -+ -+ - def init_dropout_state(dropout, train, dropout_seed, dropout_state): - dropout_desc_name = "desc_" + str(torch.cuda.current_device()) - dropout_p = dropout if train else 0 -@@ -67,3 +89,19 @@ def init_dropout_state(dropout, train, dropout_seed, dropout_state): - ) - dropout_ts = dropout_state[dropout_desc_name].get() - return dropout_ts -+ -+ -+class CudnnRNNModule(PropModule): -+ def __init__(self, m, name): -+ super().__init__(m, name) -+ self.m.Unserializable = Unserializable -+ self.m.get_cudnn_mode = get_cudnn_mode -+ self.m.init_dropout_state = init_dropout_state -+ -+ fp32_precision = ContextProp( -+ _get_fp32_precision_getter("cuda", "rnn"), -+ _set_fp32_precision_setter("cuda", "rnn"), -+ ) -+ -+ -+sys.modules[__name__] = CudnnRNNModule(sys.modules[__name__], __name__) diff --git a/recipe/patches/0015-Fix-ICE-in-GCC-14-with-arm.patch b/recipe/patches/0015-Fix-ICE-in-GCC-14-with-arm.patch deleted file mode 100644 index 162a68e4..00000000 --- a/recipe/patches/0015-Fix-ICE-in-GCC-14-with-arm.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 5b384c3e8723023fe20ac4afbf48914b7e092860 Mon Sep 17 00:00:00 2001 -From: Nikita Shulga <2453524+malfet@users.noreply.github.com> -Date: Tue, 10 Feb 2026 04:35:39 +0000 -Subject: [PATCH 15/16] Fix ICE in GCC 14 with arm - -Updated preprocessor directive for GCC version check and removed BF16 condition. I.e. right now SVE256 compilation with gcc-14.2 on Debian13 for ` -march=armv8-a+sve+bf16` - -Without the fix, compilation fails with -``` -In file included from /home/dev/git/pytorch/pytorch/build/aten/src/ATen/native/cpu/Unfold2d.cpp.SVE256.cpp:1: -/home/dev/git/pytorch/pytorch/aten/src/ATen/native/cpu/Unfold2d.cpp: In function 'void at::native::{anonymous}::unfolded2d_acc_kernel(c10::ScalarType, void*, void*, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, bool)': -/home/dev/git/pytorch/pytorch/aten/src/ATen/native/cpu/Unfold2d.cpp:225:1: error: unrecognizable insn: - 225 | } - | ^ -(insn 1371 1370 1372 101 (set (reg:VNx16BI 3235) - (unspec:VNx16BI [ - (reg:VNx16BI 3232) - (reg:VNx8BI 3234) - (const_vector:VNx4BI [ - (const_int 0 [0]) repeated x8 - ]) - ] UNSPEC_TRN1_CONV)) "/home/dev/git/pytorch/pytorch/torch/headeronly/util/bit_cast.h":40:14 -1 - (nil)) -during RTL pass: vregs -/home/dev/git/pytorch/pytorch/aten/src/ATen/native/cpu/Unfold2d.cpp:225:1: internal compiler error: in extract_insn, at recog.cc:2812 -``` - -Not sure what compelled me to put such a narrow restriction in https://github.com/pytorch/pytorch/pull/157867 - -Fixes https://github.com/pytorch/pytorch/issues/172630 - -Pull Request resolved: https://github.com/pytorch/pytorch/pull/174647 -Approved by: https://github.com/seemethere ---- - aten/src/ATen/native/cpu/Unfold2d.cpp | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/aten/src/ATen/native/cpu/Unfold2d.cpp b/aten/src/ATen/native/cpu/Unfold2d.cpp -index ed69998e99f..9ae1391e260 100644 ---- a/aten/src/ATen/native/cpu/Unfold2d.cpp -+++ b/aten/src/ATen/native/cpu/Unfold2d.cpp -@@ -169,8 +169,9 @@ void unfolded2d_acc_channels_last( - - /* note: due to write issues, this one cannot be parallelized as well as - * unfolded2d_copy */ --#if defined(__GNUC__) && __GNUC__ == 14 && defined(__ARM_FEATURE_SVE) && !defined(__ARM_FEATURE_BF16) --// Workaround for gcc-14.2.0 ICE during RTL pass: vregs when compiling for SVE without BF16 -+#if defined(__GNUC__) && __GNUC__ == 14 && defined(__ARM_FEATURE_SVE) -+// Workaround for gcc-14.2.0 ICE during RTL pass: vregs when compiling for SVE -+// NS: With or without BF16, see https://github.com/pytorch/pytorch/issues/172630 - __attribute__((optimize("no-tree-vectorize"))) - #endif - void unfolded2d_acc_kernel( diff --git a/recipe/patches/0017-Ensure-test_tensorinv-uses-well-conditioned-inputs-1.patch b/recipe/patches/0017-Ensure-test_tensorinv-uses-well-conditioned-inputs-1.patch new file mode 100644 index 00000000..6438a8cf --- /dev/null +++ b/recipe/patches/0017-Ensure-test_tensorinv-uses-well-conditioned-inputs-1.patch @@ -0,0 +1,43 @@ +From dc1d381cbb45c75ae280c1493663fc38fa4b27f1 Mon Sep 17 00:00:00 2001 +From: Grayson Derossi +Date: Thu, 19 Feb 2026 21:17:45 +0000 +Subject: [PATCH] Ensure test_tensorinv uses well-conditioned inputs (#175283) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +`test_tensorinv_cuda_float32` is failing on multiple GPU types because one of the test matrices is ill-conditioned and has potential for numerical error that's right on the bubble given the current tolerance. Changing the underlying algorithm from using a transpose to not using a transpose was enough to shift this test from passing to failing. + +This PR changes the setup of this test to remove the precision override and instead use `make_fullrank_matrices_with_distinct_singular_values` to ensure that inputs are well-conditioned, like is already done for `test_linalg_lu_family`. + +Fixes #175282 + +Pull Request resolved: https://github.com/pytorch/pytorch/pull/175283 +Approved by: https://github.com/malfet + +Signed-off-by: Michał Górny +--- + test/test_linalg.py | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/test/test_linalg.py b/test/test_linalg.py +index 0399bcd0ff0..dc93bb96c04 100644 +--- a/test/test_linalg.py ++++ b/test/test_linalg.py +@@ -3751,11 +3751,14 @@ class TestLinalg(TestCase): + @skipCUDAIfNoMagma + @skipCPUIfNoLapack + @dtypes(*floating_and_complex_types()) +- @precisionOverride({torch.float: 1e-3, torch.cfloat: 1e-3}) + def test_tensorinv(self, device, dtype): ++ make_fullrank = make_fullrank_matrices_with_distinct_singular_values + + def run_test(a_shape, ind): +- a = torch.randn(a_shape, dtype=dtype, device=device) ++ n = 1 ++ for s in a_shape[:ind]: ++ n *= s ++ a = make_fullrank(n, n, dtype=dtype, device=device).reshape(a_shape) + a_numpy = a.cpu().numpy() + result = torch.linalg.tensorinv(a, ind=ind) + expected = np.linalg.tensorinv(a_numpy, ind=ind) diff --git a/recipe/patches/0018-Declare-_tryToInferTypeImpl-with-TORCH_PYTHON_API.patch b/recipe/patches/0018-Declare-_tryToInferTypeImpl-with-TORCH_PYTHON_API.patch new file mode 100644 index 00000000..679631b0 --- /dev/null +++ b/recipe/patches/0018-Declare-_tryToInferTypeImpl-with-TORCH_PYTHON_API.patch @@ -0,0 +1,26 @@ +From 1ddba0ff755b9a1291be5b1543a50aed154423cb Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= +Date: Fri, 24 Apr 2026 20:20:05 +0200 +Subject: [PATCH 17/17] Declare `_tryToInferTypeImpl` with `TORCH_PYTHON_API` +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Michał Górny +--- + torch/csrc/jit/python/pybind_utils.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/torch/csrc/jit/python/pybind_utils.h b/torch/csrc/jit/python/pybind_utils.h +index 378bf4636fd..bad53cd9bb2 100644 +--- a/torch/csrc/jit/python/pybind_utils.h ++++ b/torch/csrc/jit/python/pybind_utils.h +@@ -373,7 +373,7 @@ InferredType tryToInferContainerType(py::handle input, bool primitiveTypeOnly); + namespace detail { + + // Additional implementations for tryToInferType(). +-std::optional _tryToInferTypeImpl(py::handle input); ++TORCH_PYTHON_API std::optional _tryToInferTypeImpl(py::handle input); + + } // namespace detail +