diff --git a/.github/actions/macos-ci-setup/action.yml b/.github/actions/macos-ci-setup/action.yml index 0d60eeae8aee3..054676d301820 100644 --- a/.github/actions/macos-ci-setup/action.yml +++ b/.github/actions/macos-ci-setup/action.yml @@ -8,7 +8,7 @@ inputs: python_version: required: false type: string - default: "3.11" + default: "3.14" node_version: required: false type: string diff --git a/.github/workflows/linux_ci.yml b/.github/workflows/linux_ci.yml index f4d110e9bc2df..8801042492ecb 100644 --- a/.github/workflows/linux_ci.yml +++ b/.github/workflows/linux_ci.yml @@ -68,6 +68,21 @@ jobs: secrets: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + build-linux-x64-release-py314: + name: Build Linux x64 Release (Python 3.14) + uses: ./.github/workflows/reusable_linux_build.yml + with: + pool_name: "onnxruntime-github-Ubuntu2204-AMD-CPU" + build_config: Release + architecture: x64 + dockerfile_path: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cpu + docker_image_repo: onnxruntimecpubuildpythonx64 + extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --build_nuget --enable_transformers_tool_test --cmake_extra_defines onnxruntime_BUILD_BENCHMARKS=ON' + python_path_prefix: 'PATH=/opt/python/cp314-cp314/bin:$PATH' # $ needs escaping in single quotes + job_identifier: build-linux-x64-release-py314 + secrets: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + orttraining-linux-ci-pipeline: name: Build Linux x64 Release with training uses: ./.github/workflows/reusable_linux_build.yml @@ -109,7 +124,7 @@ jobs: dockerfile_path: tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/Dockerfile docker_image_repo: onnxruntimecpubuildpythonaarch64 extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --cmake_extra_defines onnxruntime_BUILD_BENCHMARKS=ON' - python_path_prefix: 'PATH=/opt/python/cp310-cp310/bin:$PATH' # $ needs escaping in single quotes + python_path_prefix: 'PATH=/opt/python/cp314-cp314/bin:$PATH' # $ needs escaping in single quotes job_identifier: build-linux-arm64-release secrets: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 32e126c34ac39..0f8b4a42f48ae 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -16,7 +16,7 @@ concurrency: cancel-in-progress: true env: - python_version: 3.11 + python_version: "3.14" jobs: cpu: @@ -28,6 +28,7 @@ jobs: {"machine": "arm64", "target": "arm64", "build_config": "Debug"}, {"machine": "arm64", "target": "arm64", "build_config": "Release"} ] + python_version: "3.14" coreml: uses: ./.github/workflows/macos-ci-build-and-test-workflow.yml @@ -39,6 +40,7 @@ jobs: {"machine": "arm64", "target": "arm64", "build_config": "Debug"}, {"machine": "arm64", "target": "arm64", "build_config": "Release"} ] + python_version: "3.14" xnnpack: uses: ./.github/workflows/macos-ci-build-and-test-workflow.yml @@ -49,6 +51,7 @@ jobs: [ {"machine": "arm64", "target": "arm64", "build_config": "Debug"} ] + python_version: "3.14" webgpu: uses: ./.github/workflows/macos-ci-build-and-test-workflow.yml @@ -60,6 +63,7 @@ jobs: {"machine": "arm64", "target": "arm64", "build_config": "Debug"}, {"machine": "arm64", "target": "arm64", "build_config": "Release"} ] + python_version: "3.14" iphone_simulator: runs-on: macos-15 diff --git a/.github/workflows/macos-ci-build-and-test-workflow.yml b/.github/workflows/macos-ci-build-and-test-workflow.yml index 75002fdf12c00..76198c7f5c1ce 100644 --- a/.github/workflows/macos-ci-build-and-test-workflow.yml +++ b/.github/workflows/macos-ci-build-and-test-workflow.yml @@ -19,7 +19,7 @@ on: python_version: required: false type: string - default: "3.11" + default: "3.14" matrix_include: required: false type: string diff --git a/.github/workflows/windows_cuda.yml b/.github/workflows/windows_cuda.yml index 0eb7f51cec986..852d0164083c4 100644 --- a/.github/workflows/windows_cuda.yml +++ b/.github/workflows/windows_cuda.yml @@ -32,7 +32,7 @@ jobs: - uses: actions/setup-python@v6 with: - python-version: '3.12' + python-version: '3.14' architecture: x64 - name: Locate vcvarsall and Setup Env @@ -173,7 +173,7 @@ jobs: - uses: actions/setup-python@v6 with: - python-version: '3.12' + python-version: '3.14' architecture: x64 - uses: actions/setup-node@v6 diff --git a/cmake/deps.txt b/cmake/deps.txt index 578dd8fd23d09..65c74060e8deb 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -46,7 +46,7 @@ protoc_linux_aarch64;https://github.com/protocolbuffers/protobuf/releases/downlo protoc_mac_universal;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-osx-universal_binary.zip;23710c3d1c2036d8d65a6a22234372fa2d7af9ef psimd;https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip;1f5454b01f06f9656b77e4a5e2e31d7422487013 pthreadpool;https://github.com/google/pthreadpool/archive/dcc9f28589066af0dbd4555579281230abbf74dd.zip;533a77943203ef15ca608bcd9dbe2c94da7451d2 -pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.13.6.zip;f780292da9db273c8ef06ccf5fd4b623624143e9 +pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v3.0.2.zip;a064e663b4d7a337ac291d1bef7337ef4e60a1ae pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/403d652dca4c1046e8145950b1c0997a9f748b57.zip;30b2a07fe4bae8574f89176e56274cacdd6d135b re2;https://github.com/google/re2/archive/refs/tags/2024-07-02.zip;646e1728269cde7fcef990bf4a8e87b047882e88 safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac diff --git a/cmake/external/pybind11.cmake b/cmake/external/pybind11.cmake index 79280c97a899e..ba14667bc3c88 100644 --- a/cmake/external/pybind11.cmake +++ b/cmake/external/pybind11.cmake @@ -6,7 +6,6 @@ onnxruntime_fetchcontent_declare( URL ${DEP_URL_pybind11} URL_HASH SHA1=${DEP_SHA1_pybind11} EXCLUDE_FROM_ALL - FIND_PACKAGE_ARGS 2.13 NAMES pybind11 + FIND_PACKAGE_ARGS 3.0 NAMES pybind11 ) onnxruntime_fetchcontent_makeavailable(pybind11_project) - diff --git a/cmake/vcpkg-ports/pybind11/portfile.cmake b/cmake/vcpkg-ports/pybind11/portfile.cmake index 2c63582d1ee15..4e4cd30a26df1 100644 --- a/cmake/vcpkg-ports/pybind11/portfile.cmake +++ b/cmake/vcpkg-ports/pybind11/portfile.cmake @@ -2,7 +2,8 @@ vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO pybind/pybind11 REF "v${VERSION}" - SHA512 497c25b33b09a9c42f67131ab82e35d689e8ce089dd7639be997305ff9a6d502447b79c824508c455d559e61f0186335b54dd2771d903a7c1621833930622d1a + # SHA512 for the zip (not tar.gz) file. + SHA512 786b1bf534ac67a8d5669f8babf67bb13e48b3a3da1b6344e43ae10a84b80bbc8fea5f12a65fd18739c341fefef5622c5dc096db964dff33cc62ea4259b2e2c1 HEAD_REF master ) diff --git a/cmake/vcpkg-ports/pybind11/vcpkg.json b/cmake/vcpkg-ports/pybind11/vcpkg.json index a730d32017885..058e2235fea08 100644 --- a/cmake/vcpkg-ports/pybind11/vcpkg.json +++ b/cmake/vcpkg-ports/pybind11/vcpkg.json @@ -1,6 +1,6 @@ { "name": "pybind11", - "version": "2.13.6", + "version": "3.0.2", "description": "pybind11 is a lightweight header-only library that exposes C++ types in Python and vice versa, mainly to create Python bindings of existing C++ code", "homepage": "https://github.com/pybind/pybind11", "license": "BSD-3-Clause", diff --git a/onnxruntime/python/tools/pytorch_export_contrib_ops.py b/onnxruntime/python/tools/pytorch_export_contrib_ops.py index 1c5e31af99d82..0bd75e5c92e4c 100644 --- a/onnxruntime/python/tools/pytorch_export_contrib_ops.py +++ b/onnxruntime/python/tools/pytorch_export_contrib_ops.py @@ -6,6 +6,7 @@ PyTorch-ONNX exporter (torch.onnx.export). """ +import contextlib import typing try: @@ -22,7 +23,7 @@ _registered_ops: typing.AbstractSet[str] = set() -def _reg(symbolic_fn: typing.Callable, namespace: str = ""): +def _reg(symbolic_fn: typing.Callable, namespace: str = "aten"): name = f"{namespace}::{symbolic_fn.__name__}" torch.onnx.register_custom_op_symbolic(name, symbolic_fn, _OPSET_VERSION) _registered_ops.add(name) @@ -49,13 +50,6 @@ def grid_sampler(g, input, grid, mode, padding_mode, align_corners): padding_mode_str = ["zeros", "border", "reflection"][padding_mode] align_corners = int(symbolic_helper._maybe_get_const(align_corners, "b")) - # From opset v13 onward, the output shape can be specified with - # (N, C, H, W) (N, H_out, W_out, 2) => (N, C, H_out, W_out) - # input_shape = input.type().sizes() - # gird_shape = grid.type().sizes() - # output_shape = input_shape[:2] + gird_shape[1:3] - # g.op(...).setType(input.type().with_sizes(output_shape)) - return g.op( "com.microsoft::GridSample", input, @@ -71,15 +65,24 @@ def inverse(g, self): return g.op("com.microsoft::Inverse", self).setType(self.type()) _reg(inverse) + torch.onnx.register_custom_op_symbolic("aten::linalg_inv", inverse, _OPSET_VERSION) + _registered_ops.add("aten::linalg_inv") + + def gelu(g, self: torch._C.Value, approximate="none"): + # PyTorch can emit aten::gelu with or without the optional approximate arg. + if not isinstance(approximate, str): + approximate = symbolic_helper._maybe_get_const(approximate, "s") - @torch.onnx.symbolic_helper.parse_args("v", "s") - def gelu(g, self: torch._C.Value, approximate: str = "none"): - # Use microsoft::Gelu for performance if possible. It only supports approximate == "none" + # Use microsoft::Gelu for performance if possible. It only supports approximate == "none". if approximate == "none": return g.op("com.microsoft::Gelu", self).setType(self.type()) return torch.onnx.symbolic_opset9.gelu(g, self, approximate) _reg(gelu) + # Some PyTorch versions dispatch GELU symbolic lookup by exporter opset. + # Registering across stable opsets keeps ORT Gelu fusion consistently enabled. + for opset in range(9, 21): + torch.onnx.register_custom_op_symbolic("aten::gelu", gelu, opset) def triu(g, self, diagonal): return g.op("com.microsoft::Trilu", self, diagonal, upper_i=1).setType(self.type()) @@ -127,3 +130,8 @@ def unregister(): for version in symbolic_helper._onnx_stable_opsets: if version >= _OPSET_VERSION and symbolic_registry.is_registered_op(kind, namespace, version): del symbolic_registry._registry[(namespace, version)][kind] + + # Also clean up gelu's multi-opset registrations (see register()). + for opset in range(9, 21): + with contextlib.suppress(Exception): + torch.onnx.unregister_custom_op_symbolic("aten::gelu", opset) diff --git a/onnxruntime/python/tools/transformers/fusion_attention.py b/onnxruntime/python/tools/transformers/fusion_attention.py index 08f8691d8b2b5..de7f0a044c118 100644 --- a/onnxruntime/python/tools/transformers/fusion_attention.py +++ b/onnxruntime/python/tools/transformers/fusion_attention.py @@ -1112,11 +1112,11 @@ def fuse(self, node, input_name_to_nodes, output_name_to_node): if ( (mul_val is None) or not (isinstance(mul_val, np.ndarray) and mul_val.size == 1) - or (float(mul_val) >= 0) + or (mul_val.item() >= 0) ): return - if float(mul_val) != -10000: - self.mask_filter_value = float(mul_val) + if mul_val.item() != -10000: + self.mask_filter_value = mul_val.item() if matmul_v.input[0] == root_input and matmul_q.input[0] == root_input and matmul_k.input[0] == root_input: mask_index = self.attention_mask.process_mask(mask_nodes[-1].input[0]) if not is_no_mask_attention else None diff --git a/onnxruntime/python/tools/transformers/large_model_exporter.py b/onnxruntime/python/tools/transformers/large_model_exporter.py index 29829a6c475d9..f4d9e28d4ecb2 100644 --- a/onnxruntime/python/tools/transformers/large_model_exporter.py +++ b/onnxruntime/python/tools/transformers/large_model_exporter.py @@ -290,6 +290,7 @@ def do_export_internal(model: nn.Module, onnx_io_tuple: tuple, onnx_inputs: tupl input_names=onnx_inp_names, output_names=onnx_out_names, dynamic_axes=onnx_dynamic_axes, + dynamo=False, ) onnx_path.unlink(missing_ok=True) diff --git a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py index b405c19b04689..0b86d5f038cd8 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py @@ -473,7 +473,7 @@ def export_onnx( input_names=input_names, output_names=output_names, dynamic_axes=dynamic_axes, - opset_version=11, + opset_version=14, do_constant_folding=True, use_external_data_format=True, verbose=verbose, diff --git a/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py b/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py index 2cb6a733c5bc7..17a4ef58914d6 100644 --- a/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py +++ b/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py @@ -235,6 +235,7 @@ def run_torchscript_separate_export( opset_version=torch_export_onnx_opset_version, do_constant_folding=True, verbose=args.verbose, + dynamo=False, ) # Check decoder_model.onnx and save all external data to one file @@ -294,6 +295,7 @@ def run_torchscript_separate_export( opset_version=torch_export_onnx_opset_version, do_constant_folding=True, verbose=args.verbose, + dynamo=False, ) # Check decoder_with_past_model.onnx and save all external data to one file diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_decoder.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_decoder.py index e10e616d35d38..31fb60f86faf1 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/whisper_decoder.py +++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_decoder.py @@ -391,8 +391,9 @@ def export_onnx( input_names=input_names, output_names=output_names, dynamic_axes=dynamic_axes, - opset_version=17, + opset_version=18, do_constant_folding=True, + dynamo=False, verbose=verbose, ) diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder.py index 851f641442016..48d4e12a38a43 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder.py +++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder.py @@ -110,8 +110,9 @@ def export_onnx( input_names=input_names, output_names=output_names, dynamic_axes=dynamic_axes, - opset_version=17, + opset_version=18, do_constant_folding=True, + dynamo=False, verbose=verbose, ) diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py index cd81edc1001be..35ec59b2bca69 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py +++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py @@ -293,8 +293,9 @@ def export_onnx( input_names=input_names, output_names=output_names, dynamic_axes=dynamic_axes, - opset_version=17, + opset_version=18, do_constant_folding=True, + dynamo=False, verbose=verbose, ) diff --git a/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py b/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py index 66f24c47f6cdb..a8c2ad1967acb 100644 --- a/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py +++ b/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py @@ -49,6 +49,7 @@ def torch_onnx_export( keep_initializers_as_inputs=keep_initializers_as_inputs, custom_opsets=custom_opsets, export_modules_as_functions=export_modules_as_functions, + dynamo=False, ) else: torch.onnx.export( diff --git a/onnxruntime/test/python/test_pytorch_export_contrib_ops.py b/onnxruntime/test/python/test_pytorch_export_contrib_ops.py index e7ea83dd00297..afefc4e616a87 100644 --- a/onnxruntime/test/python/test_pytorch_export_contrib_ops.py +++ b/onnxruntime/test/python/test_pytorch_export_contrib_ops.py @@ -59,6 +59,9 @@ def setUp(self): torch.manual_seed(0) pytorch_export_contrib_ops.register() + def tearDown(self): + pytorch_export_contrib_ops.unregister() + def run_test( self, model, @@ -101,6 +104,7 @@ def run_test( input_names=input_names, output_names=output_names, custom_opsets=custom_opsets, + dynamo=False, ) # compute onnxruntime output prediction @@ -143,12 +147,13 @@ def test_gelu_is_fused_by_default(self): f, opset_version=self.opset_version, custom_opsets={"com.microsoft": 1}, + dynamo=False, ) f.seek(0) onnx_model = onnx.load(f) - node = onnx_model.graph.node[0] - self.assertEqual(node.op_type, "Gelu") - self.assertEqual(node.domain, "com.microsoft") + # Default GELU should be mapped to ORT contrib Gelu for performance. + gelu_nodes = [n for n in onnx_model.graph.node if n.op_type == "Gelu" and n.domain == "com.microsoft"] + self.assertEqual(len(gelu_nodes), 1) @parameterized.parameterized.expand([("default_approximate", "none"), ("tanh_approximate", "tanh")]) @unittest.skipIf(_torch_version_lower_than("1.12"), "Gelu's approximate parameter unsupported in PyTorch < 1.12") @@ -230,8 +235,8 @@ def forward(self, input): # IR version 4 style export. ONNXExporterTest_opset9_IRv4 = type( "TestONNXRuntime_opset9_IRv4", - (unittest.TestCase,), - dict(ONNXExporterTest.__dict__, keep_initializers_as_inputs=False), + (ONNXExporterTest,), + dict(keep_initializers_as_inputs=False), ) diff --git a/onnxruntime/test/python/transformers/parity_utilities.py b/onnxruntime/test/python/transformers/parity_utilities.py index fa16f0e67a523..04a1ed06773e7 100644 --- a/onnxruntime/test/python/transformers/parity_utilities.py +++ b/onnxruntime/test/python/transformers/parity_utilities.py @@ -92,6 +92,7 @@ def export_onnx(model, onnx_model_path, float16, hidden_size, device): dynamic_axes=dynamic_axes, opset_version=11, do_constant_folding=True, + dynamo=False, ) print("exported:", onnx_model_path) diff --git a/onnxruntime/test/python/transformers/test_gelu_fusions.py b/onnxruntime/test/python/transformers/test_gelu_fusions.py index 11ae1401ff8ed..a63e2653f2fbc 100644 --- a/onnxruntime/test/python/transformers/test_gelu_fusions.py +++ b/onnxruntime/test/python/transformers/test_gelu_fusions.py @@ -75,17 +75,22 @@ def test_fusions(self, test_case, dynamo): dummy_input = torch.ones(3, dtype=torch.float32) test_name = f"{operator}_{source}" onnx_path = f"{test_name}.onnx" + + # For Torch 2.10+, torch.nn.functional.gelu(approximate="tanh") exports as Gelu node. + # So we force opset_version=18 here. torch.onnx.export( model, (dummy_input,), onnx_path, input_names=["input"], output_names=["output"], - dynamo=dynamo, + opset_version=18, + dynamo=False, optimize=True, # Only meaningful when dynamo is True ) optimizer = optimize_model(onnx_path, "bert") # optimizer.save_model_to_file(f"{operator}_{source}_opt.onnx") + os.remove(onnx_path) # Remove the associated .data file (dynamo) data_path = onnx_path + ".data" diff --git a/onnxruntime/test/python/transformers/test_gqa.py b/onnxruntime/test/python/transformers/test_gqa.py index 6def1be804743..5ff0572c927c6 100644 --- a/onnxruntime/test/python/transformers/test_gqa.py +++ b/onnxruntime/test/python/transformers/test_gqa.py @@ -1967,7 +1967,7 @@ def has_flash_attention(bf16=False): def has_quantized_kv_cache(): - return version.parse(ort_version) >= version.parse("1.24.0") + return version.parse(ort_version) >= version.parse("1.25.0") @unittest.skipIf(not has_flash_attention(), "Flash Attention is not available, skipping tests.") @@ -2069,6 +2069,7 @@ def test_gqa_past_flash_attention_bf16(self, name, config): @unittest.skipIf(not has_flash_attention(), "Flash Attention is not available, skipping tests.") +@unittest.skipIf(not has_quantized_kv_cache(), "Quantized KV Cache is not available, skipping tests.") class TestFlashGQABF16QuantizedKV(unittest.TestCase): def manual_seed(self): # Reset random seeds before each test to ensure test isolation @@ -2301,6 +2302,7 @@ def gqa_xqa_test_cases(): @unittest.skipIf(not has_flash_attention(), "Flash Attention is not available, skipping tests.") +@unittest.skipIf(not has_quantized_kv_cache(), "Quantized KV Cache is not available, skipping tests.") class TestXQAQuantizedParity(unittest.TestCase): """Tests that verify fused kernels produce the same results as unfused kernels.""" @@ -2330,6 +2332,7 @@ def test_xqa_quantized_parity(self, name, config, torch_type, ort_type): @unittest.skipIf(not has_flash_attention(), "Flash Attention is not available, skipping tests.") +@unittest.skipIf(not has_quantized_kv_cache(), "Quantized KV Cache is not available, skipping tests.") class TestGQARegressions(unittest.TestCase): """Specific regression tests for historical bugs.""" diff --git a/onnxruntime/test/python/transformers/test_parity_huggingface_gpt_attention.py b/onnxruntime/test/python/transformers/test_parity_huggingface_gpt_attention.py index 444d86da75ba6..c07eb39e6df75 100644 --- a/onnxruntime/test/python/transformers/test_parity_huggingface_gpt_attention.py +++ b/onnxruntime/test/python/transformers/test_parity_huggingface_gpt_attention.py @@ -253,6 +253,7 @@ def export_onnx(model, onnx_model_path, float16, hidden_size, num_attention_head dynamic_axes=dynamic_axes, opset_version=11, do_constant_folding=True, + dynamo=False, ) print("exported:", onnx_model_path) diff --git a/onnxruntime/test/python/transformers/test_phi_vision.py b/onnxruntime/test/python/transformers/test_phi_vision.py index d276366706af9..5a5fa926eb255 100644 --- a/onnxruntime/test/python/transformers/test_phi_vision.py +++ b/onnxruntime/test/python/transformers/test_phi_vision.py @@ -208,6 +208,7 @@ def export(self, model, inputs): "input": {0: "batch", 1: "seq"}, "attention_mask": {0: "batch", 2: "seq", 3: "seq"}, }, + dynamo=False, ) else: torch.onnx.export( @@ -217,6 +218,7 @@ def export(self, model, inputs): export_params=True, opset_version=14, do_constant_folding=True, + dynamo=False, ) def tearDown(self): diff --git a/onnxruntime/test/python/transformers/test_whisper.py b/onnxruntime/test/python/transformers/test_whisper.py index e3ca8e6b6ac9c..e90a14f8d7d61 100644 --- a/onnxruntime/test/python/transformers/test_whisper.py +++ b/onnxruntime/test/python/transformers/test_whisper.py @@ -471,8 +471,9 @@ def export(self, model, inputs, input_names, output_names, dynamic_axes): input_names=input_names, output_names=output_names, dynamic_axes=dynamic_axes, - opset_version=17, + opset_version=18, do_constant_folding=True, + dynamo=False, verbose=False, ) @@ -530,9 +531,7 @@ def test_hf_whisper_encoder_self_attention(self, precision, ep): use_gpu=True, only_onnxruntime=False, ) - name = f"hf_{precision}_encoder_self_attention.onnx" - # optimized_model.save_model_to_file(name) # Uncomment for debugging purposes - self.verify_fusion(optimized_model, name) + self.verify_fusion(optimized_model, f"hf_{precision}_encoder_self_attention.onnx") @parameterized.expand( [ diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 146b3ae45de19..038ef04db2256 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1756,7 +1756,7 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs): # Install cpu only version of torch when cuda is not enabled in Linux. extra = [] if args.use_cuda and is_linux() else ["--index-url", "https://download.pytorch.org/whl/cpu"] run_subprocess( - [sys.executable, "-m", "pip", "install", "torch==2.8.0", "torchvision==0.23.0", *extra], + [sys.executable, "-m", "pip", "install", "torch==2.10.0", "torchvision==0.25.0", *extra], cwd=cwd, dll_path=dll_path, python_path=python_path, @@ -1833,11 +1833,9 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs): [sys.executable, "-m", "unittest", "discover", "-s", "quantization"], cwd=cwd, dll_path=dll_path ) - # onnx package does not support python 3.14 yet so skip the transformers tests for python 3.14. - # we can remove this check when onnx package supports python 3.14. if args.enable_transformers_tool_test and (sys.version_info.major, sys.version_info.minor) < ( 3, - 14, + 15, ): import google.protobuf # noqa: PLC0415 import numpy # noqa: PLC0415 diff --git a/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml index b634cbf1c287d..381861d2b327a 100644 --- a/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml @@ -183,8 +183,6 @@ stages: - stage: Win_py_${{ parameters.EP_NAME }}_Wheels_${{ replace(parameters.PYTHON_VERSION,'.','_') }}_Tests dependsOn: Win_py_${{ parameters.EP_NAME }}_Wheels_${{ replace(parameters.PYTHON_VERSION,'.','_') }}_Build - # Skip this stage for Python 3.14 for now until onnx package support python 3.14. - condition: and(succeeded(), ne('${{ parameters.PYTHON_VERSION }}', '3.14')) jobs: - job: Win_py_${{ parameters.EP_NAME }}_Wheels_${{ replace(parameters.PYTHON_VERSION,'.','_') }}_Tests workspace: diff --git a/tools/ci_build/github/azure-pipelines/stages/py-win-webgpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-win-webgpu-stage.yml index d7437a66701f5..0d3357319dd53 100644 --- a/tools/ci_build/github/azure-pipelines/stages/py-win-webgpu-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/py-win-webgpu-stage.yml @@ -138,8 +138,6 @@ stages: - stage: Win_py_webgpu_Wheels_${{ replace(parameters.PYTHON_VERSION,'.','_') }}_Tests dependsOn: Win_py_webgpu_Wheels_${{ replace(parameters.PYTHON_VERSION,'.','_') }}_Build - # Skip this stage for Python 3.14 for now until onnx package support python 3.14. - condition: and(succeeded(), ne('${{ parameters.PYTHON_VERSION }}', '3.14')) jobs: - job: Win_py_webgpu_Wheels_${{ replace(parameters.PYTHON_VERSION,'.','_') }}_Tests workspace: diff --git a/tools/ci_build/github/azure-pipelines/templates/py-win-cpu.yml b/tools/ci_build/github/azure-pipelines/templates/py-win-cpu.yml index 09603f2350657..326cfd7829f2f 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-win-cpu.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-win-cpu.yml @@ -149,20 +149,18 @@ jobs: - powershell: | - if ("$(PythonVersion)" -notcontains "3.14") { - python -m pip uninstall -y onnxruntime onnxruntime-gpu -qq - Get-ChildItem -Path $(Build.ArtifactStagingDirectory)/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname tabulate} - Remove-Item -Recurse -Force onnxruntime - if ("$(ExtraParam)".Split() -contains "--use_azure") { - - if( "${{parameters.architecture}}" -eq 'arm64') { - $env:path="$(Build.SourcesDirectory)\build\${{ parameters.cmake_build_type }}\_deps\vcpkg-src\installed\arm64-windows\bin;$env:path" - } else { - $env:path="$(Build.SourcesDirectory)\build\${{ parameters.cmake_build_type }}\_deps\vcpkg-src\installed\x64-windows\bin;$(Build.SourcesDirectory)\build\${{ parameters.cmake_build_type }}\_deps\vcpkg-src\installed\x86-windows\bin;$env:path" - } - python onnxruntime_test_python_azure.py + python -m pip uninstall -y onnxruntime onnxruntime-gpu -qq + Get-ChildItem -Path $(Build.ArtifactStagingDirectory)/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname tabulate} + Remove-Item -Recurse -Force onnxruntime + if ("$(ExtraParam)".Split() -contains "--use_azure") { + + if( "${{parameters.architecture}}" -eq 'arm64') { + $env:path="$(Build.SourcesDirectory)\build\${{ parameters.cmake_build_type }}\_deps\vcpkg-src\installed\arm64-windows\bin;$env:path" + } else { + $env:path="$(Build.SourcesDirectory)\build\${{ parameters.cmake_build_type }}\_deps\vcpkg-src\installed\x64-windows\bin;$(Build.SourcesDirectory)\build\${{ parameters.cmake_build_type }}\_deps\vcpkg-src\installed\x86-windows\bin;$env:path" } - python onnx_backend_test_series.py + python onnxruntime_test_python_azure.py } + python onnx_backend_test_series.py workingDirectory: '$(Build.SourcesDirectory)\build\${{ parameters.cmake_build_type }}\${{ parameters.cmake_build_type }}' displayName: 'Run Python Tests' diff --git a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt index 42bee7a892b11..7e2b6e74cfdde 100644 --- a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt +++ b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt @@ -1,5 +1,5 @@ -numpy==2.2.6; python_version < "3.14" -numpy==2.3.2; python_version >= "3.14" +numpy==2.2.6; python_version < "3.11" +numpy==2.4.2; python_version >= "3.11" mypy pytest setuptools>=68.2.2 @@ -7,4 +7,4 @@ wheel protobuf==4.25.8 sympy==1.14 flatbuffers -onnx==1.20.1; python_version < "3.14" +onnx==1.20.1 diff --git a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt index c5fc16837e093..63a8e96d8c128 100644 --- a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt @@ -3,13 +3,13 @@ beartype==0.15.0 flatbuffers cerberus h5py -onnx==1.20.1; python_version < "3.14" +onnx==1.20.1 # Python dependencies required for pytorch development astunparse expecttest!=0.2.0 hypothesis -numpy==2.2.6; python_version < "3.14" -numpy==2.3.2; python_version >= "3.14" +numpy==2.2.6; python_version < "3.11" +numpy==2.4.2; python_version >= "3.11" psutil pyyaml requests diff --git a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt index 8f5d0776501c0..ffcad5ee67208 100644 --- a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt @@ -1,5 +1,5 @@ -numpy==2.2.6; python_version < "3.14" -numpy==2.3.2; python_version >= "3.14" +numpy==2.2.6; python_version < "3.11" +numpy==2.4.2; python_version >= "3.11" mypy pytest setuptools>=68.2.2 @@ -8,6 +8,5 @@ protobuf==6.33.0 sympy==1.14 flatbuffers neural-compressor>=2.2.1 -triton==3.2.0; python_version < "3.14" -triton==3.5.0; python_version >= "3.14" -onnx==1.20.1; python_version < "3.14" +triton==3.5.0 +onnx==1.20.1 diff --git a/tools/ci_build/github/linux/docker/scripts/requirements.txt b/tools/ci_build/github/linux/docker/scripts/requirements.txt index 85a9c6391af80..ad57cc715589b 100644 --- a/tools/ci_build/github/linux/docker/scripts/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/requirements.txt @@ -1,6 +1,6 @@ cerberus -numpy==2.2.6; python_version < "3.14" -numpy==2.3.2; python_version >= "3.14" +numpy==2.2.6; python_version < "3.11" +numpy==2.4.2; python_version >= "3.11" mypy pytest setuptools==78.1.1 @@ -10,6 +10,6 @@ sympy==1.14 flatbuffers protobuf==6.33.0 packaging -onnxscript==0.5.3; python_version < "3.14" -onnx-ir==0.1.10; python_version < "3.14" -onnx==1.20.1; python_version < "3.14" +onnxscript==0.6.2 +onnx-ir==0.1.16 +onnx==1.20.1 diff --git a/tools/ci_build/github/linux/python/requirements.txt b/tools/ci_build/github/linux/python/requirements.txt index 77b6fd988fb19..994776e8fb6fd 100644 --- a/tools/ci_build/github/linux/python/requirements.txt +++ b/tools/ci_build/github/linux/python/requirements.txt @@ -1,5 +1,5 @@ -numpy==2.2.6; python_version < "3.14" -numpy==2.3.2; python_version >= "3.14" +numpy==2.2.6; python_version < "3.11" +numpy==2.4.2; python_version >= "3.11" mypy pytest setuptools>=68.2.2 @@ -8,8 +8,8 @@ protobuf==6.33.5 sympy==1.14 flatbuffers psutil -onnxscript==0.5.3; python_version < "3.14" -onnx-ir==0.1.10; python_version < "3.14" +onnxscript==0.6.2 +onnx-ir==0.1.16 jinja2 markupsafe -onnx==1.20.1; python_version < "3.14" +onnx==1.20.1 diff --git a/tools/ci_build/github/windows/python/requirements.txt b/tools/ci_build/github/windows/python/requirements.txt index a86eef170bc25..83593ff47e453 100644 --- a/tools/ci_build/github/windows/python/requirements.txt +++ b/tools/ci_build/github/windows/python/requirements.txt @@ -1,5 +1,5 @@ -numpy==2.2.6; python_version < "3.14" -numpy==2.3.2; python_version >= "3.14" +numpy==2.2.6; python_version < "3.11" +numpy==2.4.2; python_version >= "3.11" mypy pytest setuptools>=68.2.2 @@ -8,10 +8,10 @@ protobuf==6.33.0 sympy==1.14 flatbuffers psutil -onnxscript==0.5.3; python_version < "3.14" -onnx-ir==0.1.10; python_version < "3.14" +onnxscript==0.6.2 +onnx-ir==0.1.16 jinja2 markupsafe semver packaging -onnx==1.20.1; python_version < "3.14" +onnx==1.20.1 diff --git a/tools/ci_build/requirements/transformers-test/requirements.txt b/tools/ci_build/requirements/transformers-test/requirements.txt index 1523b420bfdbd..c764225dbc98d 100644 --- a/tools/ci_build/requirements/transformers-test/requirements.txt +++ b/tools/ci_build/requirements/transformers-test/requirements.txt @@ -2,14 +2,14 @@ packaging # protobuf and numpy is same as tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt protobuf==6.33.0 -numpy==2.2.6; python_version < "3.14" -numpy==2.3.2; python_version >= "3.14" -torch==2.8.0 -torchvision==0.23.0 +numpy==2.2.6; python_version < "3.11" +numpy==2.4.2; python_version >= "3.11" +torch==2.10.0 +torchvision==0.25.0 transformers==4.52.1 parameterized>=0.8.1 sentencepiece psutil einops -onnxscript==0.5.3; python_version < "3.14" -onnx-ir==0.1.10; python_version < "3.14" +onnxscript==0.6.2 +onnx-ir==0.1.16