diff --git a/.github/actions/macos-ci-setup/action.yml b/.github/actions/macos-ci-setup/action.yml
index 0d60eeae8aee3..054676d301820 100644
--- a/.github/actions/macos-ci-setup/action.yml
+++ b/.github/actions/macos-ci-setup/action.yml
@@ -8,7 +8,7 @@ inputs:
   python_version:
     required: false
     type: string
-    default: "3.11"
+    default: "3.14"
   node_version:
     required: false
     type: string
diff --git a/.github/workflows/linux_ci.yml b/.github/workflows/linux_ci.yml
index f4d110e9bc2df..8801042492ecb 100644
--- a/.github/workflows/linux_ci.yml
+++ b/.github/workflows/linux_ci.yml
@@ -68,6 +68,21 @@ jobs:
     secrets:
       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
+  build-linux-x64-release-py314:
+    name: Build Linux x64 Release (Python 3.14)
+    uses: ./.github/workflows/reusable_linux_build.yml
+    with:
+      pool_name: "onnxruntime-github-Ubuntu2204-AMD-CPU"
+      build_config: Release
+      architecture: x64
+      dockerfile_path: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cpu
+      docker_image_repo: onnxruntimecpubuildpythonx64
+      extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --build_nuget --enable_transformers_tool_test --cmake_extra_defines onnxruntime_BUILD_BENCHMARKS=ON'
+      python_path_prefix: 'PATH=/opt/python/cp314-cp314/bin:$PATH' # $ needs escaping in single quotes
+      job_identifier: build-linux-x64-release-py314
+    secrets:
+      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
   orttraining-linux-ci-pipeline:
     name: Build Linux x64 Release with training
     uses: ./.github/workflows/reusable_linux_build.yml
@@ -109,7 +124,7 @@ jobs:
       dockerfile_path: tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/Dockerfile
       docker_image_repo: onnxruntimecpubuildpythonaarch64
       extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --cmake_extra_defines onnxruntime_BUILD_BENCHMARKS=ON'
-      python_path_prefix: 'PATH=/opt/python/cp310-cp310/bin:$PATH' # $ needs escaping in single quotes
+      python_path_prefix: 'PATH=/opt/python/cp314-cp314/bin:$PATH' # $ needs escaping in single quotes
       job_identifier: build-linux-arm64-release
     secrets:
       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
index 32e126c34ac39..0f8b4a42f48ae 100644
--- a/.github/workflows/mac.yml
+++ b/.github/workflows/mac.yml
@@ -16,7 +16,7 @@ concurrency:
   cancel-in-progress: true
 
 env:
-  python_version: 3.11
+  python_version: "3.14"
 
 jobs:
   cpu:
@@ -28,6 +28,7 @@ jobs:
           {"machine": "arm64", "target": "arm64", "build_config": "Debug"},
           {"machine": "arm64", "target": "arm64", "build_config": "Release"}
         ]
+      python_version: "3.14"
 
   coreml:
     uses: ./.github/workflows/macos-ci-build-and-test-workflow.yml
@@ -39,6 +40,7 @@ jobs:
           {"machine": "arm64", "target": "arm64", "build_config": "Debug"},
           {"machine": "arm64", "target": "arm64", "build_config": "Release"}
         ]
+      python_version: "3.14"
 
   xnnpack:
     uses: ./.github/workflows/macos-ci-build-and-test-workflow.yml
@@ -49,6 +51,7 @@ jobs:
         [
           {"machine": "arm64", "target": "arm64", "build_config": "Debug"}
         ]
+      python_version: "3.14"
 
   webgpu:
     uses: ./.github/workflows/macos-ci-build-and-test-workflow.yml
@@ -60,6 +63,7 @@ jobs:
           {"machine": "arm64", "target": "arm64", "build_config": "Debug"},
           {"machine": "arm64", "target": "arm64", "build_config": "Release"}
         ]
+      python_version: "3.14"
 
   iphone_simulator:
     runs-on: macos-15
diff --git a/.github/workflows/macos-ci-build-and-test-workflow.yml b/.github/workflows/macos-ci-build-and-test-workflow.yml
index 75002fdf12c00..76198c7f5c1ce 100644
--- a/.github/workflows/macos-ci-build-and-test-workflow.yml
+++ b/.github/workflows/macos-ci-build-and-test-workflow.yml
@@ -19,7 +19,7 @@ on:
       python_version:
         required: false
         type: string
-        default: "3.11"
+        default: "3.14"
       matrix_include:
         required: false
         type: string
diff --git a/.github/workflows/windows_cuda.yml b/.github/workflows/windows_cuda.yml
index 0eb7f51cec986..852d0164083c4 100644
--- a/.github/workflows/windows_cuda.yml
+++ b/.github/workflows/windows_cuda.yml
@@ -32,7 +32,7 @@ jobs:
 
       - uses: actions/setup-python@v6
         with:
-          python-version: '3.12'
+          python-version: '3.14'
           architecture: x64
 
       - name: Locate vcvarsall and Setup Env
@@ -173,7 +173,7 @@ jobs:
 
       - uses: actions/setup-python@v6
         with:
-          python-version: '3.12'
+          python-version: '3.14'
           architecture: x64
 
       - uses: actions/setup-node@v6
diff --git a/cmake/deps.txt b/cmake/deps.txt
index 578dd8fd23d09..65c74060e8deb 100644
--- a/cmake/deps.txt
+++ b/cmake/deps.txt
@@ -46,7 +46,7 @@ protoc_linux_aarch64;https://github.com/protocolbuffers/protobuf/releases/downlo
 protoc_mac_universal;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-osx-universal_binary.zip;23710c3d1c2036d8d65a6a22234372fa2d7af9ef
 psimd;https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip;1f5454b01f06f9656b77e4a5e2e31d7422487013
 pthreadpool;https://github.com/google/pthreadpool/archive/dcc9f28589066af0dbd4555579281230abbf74dd.zip;533a77943203ef15ca608bcd9dbe2c94da7451d2
-pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.13.6.zip;f780292da9db273c8ef06ccf5fd4b623624143e9
+pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v3.0.2.zip;a064e663b4d7a337ac291d1bef7337ef4e60a1ae
 pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/403d652dca4c1046e8145950b1c0997a9f748b57.zip;30b2a07fe4bae8574f89176e56274cacdd6d135b
 re2;https://github.com/google/re2/archive/refs/tags/2024-07-02.zip;646e1728269cde7fcef990bf4a8e87b047882e88
 safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac
diff --git a/cmake/external/pybind11.cmake b/cmake/external/pybind11.cmake
index 79280c97a899e..ba14667bc3c88 100644
--- a/cmake/external/pybind11.cmake
+++ b/cmake/external/pybind11.cmake
@@ -6,7 +6,6 @@ onnxruntime_fetchcontent_declare(
     URL ${DEP_URL_pybind11}
     URL_HASH SHA1=${DEP_SHA1_pybind11}
     EXCLUDE_FROM_ALL
-    FIND_PACKAGE_ARGS 2.13 NAMES pybind11
+    FIND_PACKAGE_ARGS 3.0 NAMES pybind11
 )
 onnxruntime_fetchcontent_makeavailable(pybind11_project)
-
diff --git a/cmake/vcpkg-ports/pybind11/portfile.cmake b/cmake/vcpkg-ports/pybind11/portfile.cmake
index 2c63582d1ee15..4e4cd30a26df1 100644
--- a/cmake/vcpkg-ports/pybind11/portfile.cmake
+++ b/cmake/vcpkg-ports/pybind11/portfile.cmake
@@ -2,7 +2,8 @@ vcpkg_from_github(
     OUT_SOURCE_PATH SOURCE_PATH
     REPO pybind/pybind11
     REF "v${VERSION}"
-    SHA512 497c25b33b09a9c42f67131ab82e35d689e8ce089dd7639be997305ff9a6d502447b79c824508c455d559e61f0186335b54dd2771d903a7c1621833930622d1a
+    # SHA512 for the zip (not tar.gz) file.
+    SHA512 786b1bf534ac67a8d5669f8babf67bb13e48b3a3da1b6344e43ae10a84b80bbc8fea5f12a65fd18739c341fefef5622c5dc096db964dff33cc62ea4259b2e2c1
     HEAD_REF master
 )
 
diff --git a/cmake/vcpkg-ports/pybind11/vcpkg.json b/cmake/vcpkg-ports/pybind11/vcpkg.json
index a730d32017885..058e2235fea08 100644
--- a/cmake/vcpkg-ports/pybind11/vcpkg.json
+++ b/cmake/vcpkg-ports/pybind11/vcpkg.json
@@ -1,6 +1,6 @@
 {
   "name": "pybind11",
-  "version": "2.13.6",
+  "version": "3.0.2",
   "description": "pybind11 is a lightweight header-only library that exposes C++ types in Python and vice versa, mainly to create Python bindings of existing C++ code",
   "homepage": "https://github.com/pybind/pybind11",
   "license": "BSD-3-Clause",
diff --git a/onnxruntime/python/tools/pytorch_export_contrib_ops.py b/onnxruntime/python/tools/pytorch_export_contrib_ops.py
index 1c5e31af99d82..0bd75e5c92e4c 100644
--- a/onnxruntime/python/tools/pytorch_export_contrib_ops.py
+++ b/onnxruntime/python/tools/pytorch_export_contrib_ops.py
@@ -6,6 +6,7 @@
 PyTorch-ONNX exporter (torch.onnx.export).
 """
 
+import contextlib
 import typing
 
 try:
@@ -22,7 +23,7 @@
 _registered_ops: typing.AbstractSet[str] = set()
 
 
-def _reg(symbolic_fn: typing.Callable, namespace: str = ""):
+def _reg(symbolic_fn: typing.Callable, namespace: str = "aten"):
     name = f"{namespace}::{symbolic_fn.__name__}"
     torch.onnx.register_custom_op_symbolic(name, symbolic_fn, _OPSET_VERSION)
     _registered_ops.add(name)
@@ -49,13 +50,6 @@ def grid_sampler(g, input, grid, mode, padding_mode, align_corners):
         padding_mode_str = ["zeros", "border", "reflection"][padding_mode]
         align_corners = int(symbolic_helper._maybe_get_const(align_corners, "b"))
 
-        # From opset v13 onward, the output shape can be specified with
-        # (N, C, H, W) (N, H_out, W_out, 2) => (N, C, H_out, W_out)
-        # input_shape = input.type().sizes()
-        # gird_shape = grid.type().sizes()
-        # output_shape = input_shape[:2] + gird_shape[1:3]
-        # g.op(...).setType(input.type().with_sizes(output_shape))
-
         return g.op(
             "com.microsoft::GridSample",
             input,
@@ -71,15 +65,24 @@ def inverse(g, self):
         return g.op("com.microsoft::Inverse", self).setType(self.type())
 
     _reg(inverse)
+    torch.onnx.register_custom_op_symbolic("aten::linalg_inv", inverse, _OPSET_VERSION)
+    _registered_ops.add("aten::linalg_inv")
+
+    def gelu(g, self: torch._C.Value, approximate="none"):
+        # PyTorch can emit aten::gelu with or without the optional approximate arg.
+        if not isinstance(approximate, str):
+            approximate = symbolic_helper._maybe_get_const(approximate, "s")
 
-    @torch.onnx.symbolic_helper.parse_args("v", "s")
-    def gelu(g, self: torch._C.Value, approximate: str = "none"):
-        # Use microsoft::Gelu for performance if possible. It only supports approximate == "none"
+        # Use microsoft::Gelu for performance if possible. It only supports approximate == "none".
         if approximate == "none":
             return g.op("com.microsoft::Gelu", self).setType(self.type())
         return torch.onnx.symbolic_opset9.gelu(g, self, approximate)
 
     _reg(gelu)
+    # Some PyTorch versions dispatch GELU symbolic lookup by exporter opset.
+    # Registering across stable opsets keeps ORT Gelu fusion consistently enabled.
+    for opset in range(9, 21):
+        torch.onnx.register_custom_op_symbolic("aten::gelu", gelu, opset)
 
     def triu(g, self, diagonal):
         return g.op("com.microsoft::Trilu", self, diagonal, upper_i=1).setType(self.type())
@@ -127,3 +130,8 @@ def unregister():
             for version in symbolic_helper._onnx_stable_opsets:
                 if version >= _OPSET_VERSION and symbolic_registry.is_registered_op(kind, namespace, version):
                     del symbolic_registry._registry[(namespace, version)][kind]
+
+    # Also clean up gelu's multi-opset registrations (see register()).
+    for opset in range(9, 21):
+        with contextlib.suppress(Exception):
+            torch.onnx.unregister_custom_op_symbolic("aten::gelu", opset)
diff --git a/onnxruntime/python/tools/transformers/fusion_attention.py b/onnxruntime/python/tools/transformers/fusion_attention.py
index 08f8691d8b2b5..de7f0a044c118 100644
--- a/onnxruntime/python/tools/transformers/fusion_attention.py
+++ b/onnxruntime/python/tools/transformers/fusion_attention.py
@@ -1112,11 +1112,11 @@ def fuse(self, node, input_name_to_nodes, output_name_to_node):
             if (
                 (mul_val is None)
                 or not (isinstance(mul_val, np.ndarray) and mul_val.size == 1)
-                or (float(mul_val) >= 0)
+                or (mul_val.item() >= 0)
             ):
                 return
-            if float(mul_val) != -10000:
-                self.mask_filter_value = float(mul_val)
+            if mul_val.item() != -10000:
+                self.mask_filter_value = mul_val.item()
 
         if matmul_v.input[0] == root_input and matmul_q.input[0] == root_input and matmul_k.input[0] == root_input:
             mask_index = self.attention_mask.process_mask(mask_nodes[-1].input[0]) if not is_no_mask_attention else None
diff --git a/onnxruntime/python/tools/transformers/large_model_exporter.py b/onnxruntime/python/tools/transformers/large_model_exporter.py
index 29829a6c475d9..f4d9e28d4ecb2 100644
--- a/onnxruntime/python/tools/transformers/large_model_exporter.py
+++ b/onnxruntime/python/tools/transformers/large_model_exporter.py
@@ -290,6 +290,7 @@ def do_export_internal(model: nn.Module, onnx_io_tuple: tuple, onnx_inputs: tupl
             input_names=onnx_inp_names,
             output_names=onnx_out_names,
             dynamic_axes=onnx_dynamic_axes,
+            dynamo=False,
         )
 
         onnx_path.unlink(missing_ok=True)
diff --git a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py
index b405c19b04689..0b86d5f038cd8 100644
--- a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py
+++ b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py
@@ -473,7 +473,7 @@ def export_onnx(
                     input_names=input_names,
                     output_names=output_names,
                     dynamic_axes=dynamic_axes,
-                    opset_version=11,
+                    opset_version=14,
                     do_constant_folding=True,
                     use_external_data_format=True,
                     verbose=verbose,
diff --git a/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py b/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py
index 2cb6a733c5bc7..17a4ef58914d6 100644
--- a/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py
+++ b/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py
@@ -235,6 +235,7 @@ def run_torchscript_separate_export(
         opset_version=torch_export_onnx_opset_version,
         do_constant_folding=True,
         verbose=args.verbose,
+        dynamo=False,
     )
 
     # Check decoder_model.onnx and save all external data to one file
@@ -294,6 +295,7 @@ def run_torchscript_separate_export(
         opset_version=torch_export_onnx_opset_version,
         do_constant_folding=True,
         verbose=args.verbose,
+        dynamo=False,
     )
 
     # Check decoder_with_past_model.onnx and save all external data to one file
diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_decoder.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_decoder.py
index e10e616d35d38..31fb60f86faf1 100644
--- a/onnxruntime/python/tools/transformers/models/whisper/whisper_decoder.py
+++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_decoder.py
@@ -391,8 +391,9 @@ def export_onnx(
                 input_names=input_names,
                 output_names=output_names,
                 dynamic_axes=dynamic_axes,
-                opset_version=17,
+                opset_version=18,
                 do_constant_folding=True,
+                dynamo=False,
                 verbose=verbose,
             )
 
diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder.py
index 851f641442016..48d4e12a38a43 100644
--- a/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder.py
+++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder.py
@@ -110,8 +110,9 @@ def export_onnx(
                 input_names=input_names,
                 output_names=output_names,
                 dynamic_axes=dynamic_axes,
-                opset_version=17,
+                opset_version=18,
                 do_constant_folding=True,
+                dynamo=False,
                 verbose=verbose,
             )
 
diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py
index cd81edc1001be..35ec59b2bca69 100644
--- a/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py
+++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py
@@ -293,8 +293,9 @@ def export_onnx(
                 input_names=input_names,
                 output_names=output_names,
                 dynamic_axes=dynamic_axes,
-                opset_version=17,
+                opset_version=18,
                 do_constant_folding=True,
+                dynamo=False,
                 verbose=verbose,
             )
 
diff --git a/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py b/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py
index 66f24c47f6cdb..a8c2ad1967acb 100644
--- a/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py
+++ b/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py
@@ -49,6 +49,7 @@ def torch_onnx_export(
             keep_initializers_as_inputs=keep_initializers_as_inputs,
             custom_opsets=custom_opsets,
             export_modules_as_functions=export_modules_as_functions,
+            dynamo=False,
         )
     else:
         torch.onnx.export(
diff --git a/onnxruntime/test/python/test_pytorch_export_contrib_ops.py b/onnxruntime/test/python/test_pytorch_export_contrib_ops.py
index e7ea83dd00297..afefc4e616a87 100644
--- a/onnxruntime/test/python/test_pytorch_export_contrib_ops.py
+++ b/onnxruntime/test/python/test_pytorch_export_contrib_ops.py
@@ -59,6 +59,9 @@ def setUp(self):
         torch.manual_seed(0)
         pytorch_export_contrib_ops.register()
 
+    def tearDown(self):
+        pytorch_export_contrib_ops.unregister()
+
     def run_test(
         self,
         model,
@@ -101,6 +104,7 @@ def run_test(
                 input_names=input_names,
                 output_names=output_names,
                 custom_opsets=custom_opsets,
+                dynamo=False,
             )
 
             # compute onnxruntime output prediction
@@ -143,12 +147,13 @@ def test_gelu_is_fused_by_default(self):
             f,
             opset_version=self.opset_version,
             custom_opsets={"com.microsoft": 1},
+            dynamo=False,
         )
         f.seek(0)
         onnx_model = onnx.load(f)
-        node = onnx_model.graph.node[0]
-        self.assertEqual(node.op_type, "Gelu")
-        self.assertEqual(node.domain, "com.microsoft")
+        # Default GELU should be mapped to ORT contrib Gelu for performance.
+        gelu_nodes = [n for n in onnx_model.graph.node if n.op_type == "Gelu" and n.domain == "com.microsoft"]
+        self.assertEqual(len(gelu_nodes), 1)
 
     @parameterized.parameterized.expand([("default_approximate", "none"), ("tanh_approximate", "tanh")])
     @unittest.skipIf(_torch_version_lower_than("1.12"), "Gelu's approximate parameter unsupported in PyTorch < 1.12")
@@ -230,8 +235,8 @@ def forward(self, input):
 # IR version 4 style export.
 ONNXExporterTest_opset9_IRv4 = type(
     "TestONNXRuntime_opset9_IRv4",
-    (unittest.TestCase,),
-    dict(ONNXExporterTest.__dict__, keep_initializers_as_inputs=False),
+    (ONNXExporterTest,),
+    dict(keep_initializers_as_inputs=False),
 )
 
 
diff --git a/onnxruntime/test/python/transformers/parity_utilities.py b/onnxruntime/test/python/transformers/parity_utilities.py
index fa16f0e67a523..04a1ed06773e7 100644
--- a/onnxruntime/test/python/transformers/parity_utilities.py
+++ b/onnxruntime/test/python/transformers/parity_utilities.py
@@ -92,6 +92,7 @@ def export_onnx(model, onnx_model_path, float16, hidden_size, device):
         dynamic_axes=dynamic_axes,
         opset_version=11,
         do_constant_folding=True,
+        dynamo=False,
     )
     print("exported:", onnx_model_path)
 
diff --git a/onnxruntime/test/python/transformers/test_gelu_fusions.py b/onnxruntime/test/python/transformers/test_gelu_fusions.py
index 11ae1401ff8ed..a63e2653f2fbc 100644
--- a/onnxruntime/test/python/transformers/test_gelu_fusions.py
+++ b/onnxruntime/test/python/transformers/test_gelu_fusions.py
@@ -75,17 +75,22 @@ def test_fusions(self, test_case, dynamo):
         dummy_input = torch.ones(3, dtype=torch.float32)
         test_name = f"{operator}_{source}"
         onnx_path = f"{test_name}.onnx"
+
+        # For Torch 2.10+, torch.nn.functional.gelu(approximate="tanh") exports as Gelu node.
+        # So we force opset_version=18 here.
         torch.onnx.export(
             model,
             (dummy_input,),
             onnx_path,
             input_names=["input"],
             output_names=["output"],
-            dynamo=dynamo,
+            opset_version=18,
+            dynamo=False,
             optimize=True,  # Only meaningful when dynamo is True
         )
         optimizer = optimize_model(onnx_path, "bert")
         # optimizer.save_model_to_file(f"{operator}_{source}_opt.onnx")
+
         os.remove(onnx_path)
         # Remove the associated .data file (dynamo)
         data_path = onnx_path + ".data"
diff --git a/onnxruntime/test/python/transformers/test_gqa.py b/onnxruntime/test/python/transformers/test_gqa.py
index 6def1be804743..5ff0572c927c6 100644
--- a/onnxruntime/test/python/transformers/test_gqa.py
+++ b/onnxruntime/test/python/transformers/test_gqa.py
@@ -1967,7 +1967,7 @@ def has_flash_attention(bf16=False):
 
 
 def has_quantized_kv_cache():
-    return version.parse(ort_version) >= version.parse("1.24.0")
+    return version.parse(ort_version) >= version.parse("1.25.0")
 
 
 @unittest.skipIf(not has_flash_attention(), "Flash Attention is not available, skipping tests.")
@@ -2069,6 +2069,7 @@ def test_gqa_past_flash_attention_bf16(self, name, config):
 
 
 @unittest.skipIf(not has_flash_attention(), "Flash Attention is not available, skipping tests.")
+@unittest.skipIf(not has_quantized_kv_cache(), "Quantized KV Cache is not available, skipping tests.")
 class TestFlashGQABF16QuantizedKV(unittest.TestCase):
     def manual_seed(self):
         # Reset random seeds before each test to ensure test isolation
@@ -2301,6 +2302,7 @@ def gqa_xqa_test_cases():
 
 
 @unittest.skipIf(not has_flash_attention(), "Flash Attention is not available, skipping tests.")
+@unittest.skipIf(not has_quantized_kv_cache(), "Quantized KV Cache is not available, skipping tests.")
 class TestXQAQuantizedParity(unittest.TestCase):
     """Tests that verify fused kernels produce the same results as unfused kernels."""
 
@@ -2330,6 +2332,7 @@ def test_xqa_quantized_parity(self, name, config, torch_type, ort_type):
 
 
 @unittest.skipIf(not has_flash_attention(), "Flash Attention is not available, skipping tests.")
+@unittest.skipIf(not has_quantized_kv_cache(), "Quantized KV Cache is not available, skipping tests.")
 class TestGQARegressions(unittest.TestCase):
     """Specific regression tests for historical bugs."""
 
diff --git a/onnxruntime/test/python/transformers/test_parity_huggingface_gpt_attention.py b/onnxruntime/test/python/transformers/test_parity_huggingface_gpt_attention.py
index 444d86da75ba6..c07eb39e6df75 100644
--- a/onnxruntime/test/python/transformers/test_parity_huggingface_gpt_attention.py
+++ b/onnxruntime/test/python/transformers/test_parity_huggingface_gpt_attention.py
@@ -253,6 +253,7 @@ def export_onnx(model, onnx_model_path, float16, hidden_size, num_attention_head
         dynamic_axes=dynamic_axes,
         opset_version=11,
         do_constant_folding=True,
+        dynamo=False,
     )
     print("exported:", onnx_model_path)
 
diff --git a/onnxruntime/test/python/transformers/test_phi_vision.py b/onnxruntime/test/python/transformers/test_phi_vision.py
index d276366706af9..5a5fa926eb255 100644
--- a/onnxruntime/test/python/transformers/test_phi_vision.py
+++ b/onnxruntime/test/python/transformers/test_phi_vision.py
@@ -208,6 +208,7 @@ def export(self, model, inputs):
                     "input": {0: "batch", 1: "seq"},
                     "attention_mask": {0: "batch", 2: "seq", 3: "seq"},
                 },
+                dynamo=False,
             )
         else:
             torch.onnx.export(
@@ -217,6 +218,7 @@ def export(self, model, inputs):
                 export_params=True,
                 opset_version=14,
                 do_constant_folding=True,
+                dynamo=False,
             )
 
     def tearDown(self):
diff --git a/onnxruntime/test/python/transformers/test_whisper.py b/onnxruntime/test/python/transformers/test_whisper.py
index e3ca8e6b6ac9c..e90a14f8d7d61 100644
--- a/onnxruntime/test/python/transformers/test_whisper.py
+++ b/onnxruntime/test/python/transformers/test_whisper.py
@@ -471,8 +471,9 @@ def export(self, model, inputs, input_names, output_names, dynamic_axes):
             input_names=input_names,
             output_names=output_names,
             dynamic_axes=dynamic_axes,
-            opset_version=17,
+            opset_version=18,
             do_constant_folding=True,
+            dynamo=False,
             verbose=False,
         )
 
@@ -530,9 +531,7 @@ def test_hf_whisper_encoder_self_attention(self, precision, ep):
             use_gpu=True,
             only_onnxruntime=False,
         )
-        name = f"hf_{precision}_encoder_self_attention.onnx"
-        # optimized_model.save_model_to_file(name)  # Uncomment for debugging purposes
-        self.verify_fusion(optimized_model, name)
+        self.verify_fusion(optimized_model, f"hf_{precision}_encoder_self_attention.onnx")
 
     @parameterized.expand(
         [
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
index 146b3ae45de19..038ef04db2256 100644
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@@ -1756,7 +1756,7 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs):
                 # Install cpu only version of torch when cuda is not enabled in Linux.
                 extra = [] if args.use_cuda and is_linux() else ["--index-url", "https://download.pytorch.org/whl/cpu"]
                 run_subprocess(
-                    [sys.executable, "-m", "pip", "install", "torch==2.8.0", "torchvision==0.23.0", *extra],
+                    [sys.executable, "-m", "pip", "install", "torch==2.10.0", "torchvision==0.25.0", *extra],
                     cwd=cwd,
                     dll_path=dll_path,
                     python_path=python_path,
@@ -1833,11 +1833,9 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs):
                         [sys.executable, "-m", "unittest", "discover", "-s", "quantization"], cwd=cwd, dll_path=dll_path
                     )
 
-                    # onnx package does not support python 3.14 yet so skip the transformers tests for python 3.14.
-                    # we can remove this check when onnx package supports python 3.14.
                     if args.enable_transformers_tool_test and (sys.version_info.major, sys.version_info.minor) < (
                         3,
-                        14,
+                        15,
                     ):
                         import google.protobuf  # noqa: PLC0415
                         import numpy  # noqa: PLC0415
diff --git a/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml
index b634cbf1c287d..381861d2b327a 100644
--- a/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml
@@ -183,8 +183,6 @@ stages:
 
   - stage: Win_py_${{ parameters.EP_NAME }}_Wheels_${{ replace(parameters.PYTHON_VERSION,'.','_') }}_Tests
     dependsOn: Win_py_${{ parameters.EP_NAME }}_Wheels_${{ replace(parameters.PYTHON_VERSION,'.','_') }}_Build
-    # Skip this stage for Python 3.14 for now until onnx package support python 3.14.
-    condition: and(succeeded(), ne('${{ parameters.PYTHON_VERSION }}', '3.14'))
     jobs:
     - job: Win_py_${{ parameters.EP_NAME }}_Wheels_${{ replace(parameters.PYTHON_VERSION,'.','_') }}_Tests
       workspace:
diff --git a/tools/ci_build/github/azure-pipelines/stages/py-win-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-win-webgpu-stage.yml
index d7437a66701f5..0d3357319dd53 100644
--- a/tools/ci_build/github/azure-pipelines/stages/py-win-webgpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/py-win-webgpu-stage.yml
@@ -138,8 +138,6 @@ stages:
 
   - stage: Win_py_webgpu_Wheels_${{ replace(parameters.PYTHON_VERSION,'.','_') }}_Tests
     dependsOn: Win_py_webgpu_Wheels_${{ replace(parameters.PYTHON_VERSION,'.','_') }}_Build
-    # Skip this stage for Python 3.14 for now until onnx package support python 3.14.
-    condition: and(succeeded(), ne('${{ parameters.PYTHON_VERSION }}', '3.14'))
     jobs:
     - job: Win_py_webgpu_Wheels_${{ replace(parameters.PYTHON_VERSION,'.','_') }}_Tests
       workspace:
diff --git a/tools/ci_build/github/azure-pipelines/templates/py-win-cpu.yml b/tools/ci_build/github/azure-pipelines/templates/py-win-cpu.yml
index 09603f2350657..326cfd7829f2f 100644
--- a/tools/ci_build/github/azure-pipelines/templates/py-win-cpu.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/py-win-cpu.yml
@@ -149,20 +149,18 @@ jobs:
 
 
   - powershell: |
-      if ("$(PythonVersion)" -notcontains "3.14") {
-        python -m pip uninstall -y onnxruntime onnxruntime-gpu -qq
-        Get-ChildItem -Path $(Build.ArtifactStagingDirectory)/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname tabulate}
-        Remove-Item -Recurse -Force onnxruntime
-        if ("$(ExtraParam)".Split() -contains "--use_azure") {
-
-          if( "${{parameters.architecture}}" -eq 'arm64') {
-            $env:path="$(Build.SourcesDirectory)\build\${{ parameters.cmake_build_type }}\_deps\vcpkg-src\installed\arm64-windows\bin;$env:path"
-          }  else {
-            $env:path="$(Build.SourcesDirectory)\build\${{ parameters.cmake_build_type }}\_deps\vcpkg-src\installed\x64-windows\bin;$(Build.SourcesDirectory)\build\${{ parameters.cmake_build_type }}\_deps\vcpkg-src\installed\x86-windows\bin;$env:path"
-          }
-          python onnxruntime_test_python_azure.py
+      python -m pip uninstall -y onnxruntime onnxruntime-gpu -qq
+      Get-ChildItem -Path $(Build.ArtifactStagingDirectory)/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname tabulate}
+      Remove-Item -Recurse -Force onnxruntime
+      if ("$(ExtraParam)".Split() -contains "--use_azure") {
+
+        if( "${{parameters.architecture}}" -eq 'arm64') {
+          $env:path="$(Build.SourcesDirectory)\build\${{ parameters.cmake_build_type }}\_deps\vcpkg-src\installed\arm64-windows\bin;$env:path"
+        }  else {
+          $env:path="$(Build.SourcesDirectory)\build\${{ parameters.cmake_build_type }}\_deps\vcpkg-src\installed\x64-windows\bin;$(Build.SourcesDirectory)\build\${{ parameters.cmake_build_type }}\_deps\vcpkg-src\installed\x86-windows\bin;$env:path"
         }
-        python onnx_backend_test_series.py
+        python onnxruntime_test_python_azure.py
       }
+      python onnx_backend_test_series.py
     workingDirectory: '$(Build.SourcesDirectory)\build\${{ parameters.cmake_build_type }}\${{ parameters.cmake_build_type }}'
     displayName: 'Run Python Tests'
diff --git a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt
index 42bee7a892b11..7e2b6e74cfdde 100644
--- a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt
+++ b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt
@@ -1,5 +1,5 @@
-numpy==2.2.6; python_version < "3.14"
-numpy==2.3.2; python_version >= "3.14"
+numpy==2.2.6; python_version < "3.11"
+numpy==2.4.2; python_version >= "3.11"
 mypy
 pytest
 setuptools>=68.2.2
@@ -7,4 +7,4 @@ wheel
 protobuf==4.25.8
 sympy==1.14
 flatbuffers
-onnx==1.20.1; python_version < "3.14"
+onnx==1.20.1
diff --git a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt
index c5fc16837e093..63a8e96d8c128 100644
--- a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt
+++ b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt
@@ -3,13 +3,13 @@ beartype==0.15.0
 flatbuffers
 cerberus
 h5py
-onnx==1.20.1; python_version < "3.14"
+onnx==1.20.1
 # Python dependencies required for pytorch development
 astunparse
 expecttest!=0.2.0
 hypothesis
-numpy==2.2.6; python_version < "3.14"
-numpy==2.3.2; python_version >= "3.14"
+numpy==2.2.6; python_version < "3.11"
+numpy==2.4.2; python_version >= "3.11"
 psutil
 pyyaml
 requests
diff --git a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt
index 8f5d0776501c0..ffcad5ee67208 100644
--- a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt
+++ b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt
@@ -1,5 +1,5 @@
-numpy==2.2.6; python_version < "3.14"
-numpy==2.3.2; python_version >= "3.14"
+numpy==2.2.6; python_version < "3.11"
+numpy==2.4.2; python_version >= "3.11"
 mypy
 pytest
 setuptools>=68.2.2
@@ -8,6 +8,5 @@ protobuf==6.33.0
 sympy==1.14
 flatbuffers
 neural-compressor>=2.2.1
-triton==3.2.0; python_version < "3.14"
-triton==3.5.0; python_version >= "3.14"
-onnx==1.20.1; python_version < "3.14"
+triton==3.5.0
+onnx==1.20.1
diff --git a/tools/ci_build/github/linux/docker/scripts/requirements.txt b/tools/ci_build/github/linux/docker/scripts/requirements.txt
index 85a9c6391af80..ad57cc715589b 100644
--- a/tools/ci_build/github/linux/docker/scripts/requirements.txt
+++ b/tools/ci_build/github/linux/docker/scripts/requirements.txt
@@ -1,6 +1,6 @@
 cerberus
-numpy==2.2.6; python_version < "3.14"
-numpy==2.3.2; python_version >= "3.14"
+numpy==2.2.6; python_version < "3.11"
+numpy==2.4.2; python_version >= "3.11"
 mypy
 pytest
 setuptools==78.1.1
@@ -10,6 +10,6 @@ sympy==1.14
 flatbuffers
 protobuf==6.33.0
 packaging
-onnxscript==0.5.3; python_version < "3.14"
-onnx-ir==0.1.10; python_version < "3.14"
-onnx==1.20.1; python_version < "3.14"
+onnxscript==0.6.2
+onnx-ir==0.1.16
+onnx==1.20.1
diff --git a/tools/ci_build/github/linux/python/requirements.txt b/tools/ci_build/github/linux/python/requirements.txt
index 77b6fd988fb19..994776e8fb6fd 100644
--- a/tools/ci_build/github/linux/python/requirements.txt
+++ b/tools/ci_build/github/linux/python/requirements.txt
@@ -1,5 +1,5 @@
-numpy==2.2.6; python_version < "3.14"
-numpy==2.3.2; python_version >= "3.14"
+numpy==2.2.6; python_version < "3.11"
+numpy==2.4.2; python_version >= "3.11"
 mypy
 pytest
 setuptools>=68.2.2
@@ -8,8 +8,8 @@ protobuf==6.33.5
 sympy==1.14
 flatbuffers
 psutil
-onnxscript==0.5.3; python_version < "3.14"
-onnx-ir==0.1.10; python_version < "3.14"
+onnxscript==0.6.2
+onnx-ir==0.1.16
 jinja2
 markupsafe
-onnx==1.20.1; python_version < "3.14"
+onnx==1.20.1
diff --git a/tools/ci_build/github/windows/python/requirements.txt b/tools/ci_build/github/windows/python/requirements.txt
index a86eef170bc25..83593ff47e453 100644
--- a/tools/ci_build/github/windows/python/requirements.txt
+++ b/tools/ci_build/github/windows/python/requirements.txt
@@ -1,5 +1,5 @@
-numpy==2.2.6; python_version < "3.14"
-numpy==2.3.2; python_version >= "3.14"
+numpy==2.2.6; python_version < "3.11"
+numpy==2.4.2; python_version >= "3.11"
 mypy
 pytest
 setuptools>=68.2.2
@@ -8,10 +8,10 @@ protobuf==6.33.0
 sympy==1.14
 flatbuffers
 psutil
-onnxscript==0.5.3; python_version < "3.14"
-onnx-ir==0.1.10; python_version < "3.14"
+onnxscript==0.6.2
+onnx-ir==0.1.16
 jinja2
 markupsafe
 semver
 packaging
-onnx==1.20.1; python_version < "3.14"
+onnx==1.20.1
diff --git a/tools/ci_build/requirements/transformers-test/requirements.txt b/tools/ci_build/requirements/transformers-test/requirements.txt
index 1523b420bfdbd..c764225dbc98d 100644
--- a/tools/ci_build/requirements/transformers-test/requirements.txt
+++ b/tools/ci_build/requirements/transformers-test/requirements.txt
@@ -2,14 +2,14 @@
 packaging
 # protobuf and numpy is same as tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt
 protobuf==6.33.0
-numpy==2.2.6; python_version < "3.14"
-numpy==2.3.2; python_version >= "3.14"
-torch==2.8.0
-torchvision==0.23.0
+numpy==2.2.6; python_version < "3.11"
+numpy==2.4.2; python_version >= "3.11"
+torch==2.10.0
+torchvision==0.25.0
 transformers==4.52.1
 parameterized>=0.8.1
 sentencepiece
 psutil
 einops
-onnxscript==0.5.3; python_version < "3.14"
-onnx-ir==0.1.10; python_version < "3.14"
+onnxscript==0.6.2
+onnx-ir==0.1.16