Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/macos-ci-setup/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ inputs:
python_version:
required: false
type: string
default: "3.11"
default: "3.14"
node_version:
required: false
type: string
Expand Down
17 changes: 16 additions & 1 deletion .github/workflows/linux_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,21 @@ jobs:
secrets:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

build-linux-x64-release-py314:
name: Build Linux x64 Release (Python 3.14)
uses: ./.github/workflows/reusable_linux_build.yml
with:
pool_name: "onnxruntime-github-Ubuntu2204-AMD-CPU"
build_config: Release
architecture: x64
dockerfile_path: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cpu
docker_image_repo: onnxruntimecpubuildpythonx64
extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --build_nuget --enable_transformers_tool_test --cmake_extra_defines onnxruntime_BUILD_BENCHMARKS=ON'
python_path_prefix: 'PATH=/opt/python/cp314-cp314/bin:$PATH' # $ needs escaping in single quotes
job_identifier: build-linux-x64-release-py314
secrets:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

orttraining-linux-ci-pipeline:
name: Build Linux x64 Release with training
uses: ./.github/workflows/reusable_linux_build.yml
Expand Down Expand Up @@ -109,7 +124,7 @@ jobs:
dockerfile_path: tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/Dockerfile
docker_image_repo: onnxruntimecpubuildpythonaarch64
extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --cmake_extra_defines onnxruntime_BUILD_BENCHMARKS=ON'
python_path_prefix: 'PATH=/opt/python/cp310-cp310/bin:$PATH' # $ needs escaping in single quotes
python_path_prefix: 'PATH=/opt/python/cp314-cp314/bin:$PATH' # $ needs escaping in single quotes
job_identifier: build-linux-arm64-release
secrets:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
6 changes: 5 additions & 1 deletion .github/workflows/mac.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ concurrency:
cancel-in-progress: true

env:
python_version: 3.11
python_version: "3.14"

jobs:
cpu:
Expand All @@ -28,6 +28,7 @@ jobs:
{"machine": "arm64", "target": "arm64", "build_config": "Debug"},
{"machine": "arm64", "target": "arm64", "build_config": "Release"}
]
python_version: "3.14"

coreml:
uses: ./.github/workflows/macos-ci-build-and-test-workflow.yml
Expand All @@ -39,6 +40,7 @@ jobs:
{"machine": "arm64", "target": "arm64", "build_config": "Debug"},
{"machine": "arm64", "target": "arm64", "build_config": "Release"}
]
python_version: "3.14"

xnnpack:
uses: ./.github/workflows/macos-ci-build-and-test-workflow.yml
Expand All @@ -49,6 +51,7 @@ jobs:
[
{"machine": "arm64", "target": "arm64", "build_config": "Debug"}
]
python_version: "3.14"

webgpu:
uses: ./.github/workflows/macos-ci-build-and-test-workflow.yml
Expand All @@ -60,6 +63,7 @@ jobs:
{"machine": "arm64", "target": "arm64", "build_config": "Debug"},
{"machine": "arm64", "target": "arm64", "build_config": "Release"}
]
python_version: "3.14"

iphone_simulator:
runs-on: macos-15
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/macos-ci-build-and-test-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ on:
python_version:
required: false
type: string
default: "3.11"
default: "3.14"
matrix_include:
required: false
type: string
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/windows_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:

- uses: actions/setup-python@v6
with:
python-version: '3.12'
python-version: '3.14'
architecture: x64

- name: Locate vcvarsall and Setup Env
Expand Down Expand Up @@ -173,7 +173,7 @@ jobs:

- uses: actions/setup-python@v6
with:
python-version: '3.12'
python-version: '3.14'
architecture: x64

- uses: actions/setup-node@v6
Expand Down
2 changes: 1 addition & 1 deletion cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ protoc_linux_aarch64;https://github.com/protocolbuffers/protobuf/releases/downlo
protoc_mac_universal;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-osx-universal_binary.zip;23710c3d1c2036d8d65a6a22234372fa2d7af9ef
psimd;https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip;1f5454b01f06f9656b77e4a5e2e31d7422487013
pthreadpool;https://github.com/google/pthreadpool/archive/dcc9f28589066af0dbd4555579281230abbf74dd.zip;533a77943203ef15ca608bcd9dbe2c94da7451d2
pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.13.6.zip;f780292da9db273c8ef06ccf5fd4b623624143e9
pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v3.0.2.zip;a064e663b4d7a337ac291d1bef7337ef4e60a1ae
pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/403d652dca4c1046e8145950b1c0997a9f748b57.zip;30b2a07fe4bae8574f89176e56274cacdd6d135b
re2;https://github.com/google/re2/archive/refs/tags/2024-07-02.zip;646e1728269cde7fcef990bf4a8e87b047882e88
safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac
Expand Down
3 changes: 1 addition & 2 deletions cmake/external/pybind11.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ onnxruntime_fetchcontent_declare(
URL ${DEP_URL_pybind11}
URL_HASH SHA1=${DEP_SHA1_pybind11}
EXCLUDE_FROM_ALL
FIND_PACKAGE_ARGS 2.13 NAMES pybind11
FIND_PACKAGE_ARGS 3.0 NAMES pybind11
)
onnxruntime_fetchcontent_makeavailable(pybind11_project)

3 changes: 2 additions & 1 deletion cmake/vcpkg-ports/pybind11/portfile.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ vcpkg_from_github(
OUT_SOURCE_PATH SOURCE_PATH
REPO pybind/pybind11
REF "v${VERSION}"
SHA512 497c25b33b09a9c42f67131ab82e35d689e8ce089dd7639be997305ff9a6d502447b79c824508c455d559e61f0186335b54dd2771d903a7c1621833930622d1a
# SHA512 for the zip (not tar.gz) file.
SHA512 786b1bf534ac67a8d5669f8babf67bb13e48b3a3da1b6344e43ae10a84b80bbc8fea5f12a65fd18739c341fefef5622c5dc096db964dff33cc62ea4259b2e2c1
HEAD_REF master
)

Expand Down
2 changes: 1 addition & 1 deletion cmake/vcpkg-ports/pybind11/vcpkg.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "pybind11",
"version": "2.13.6",
"version": "3.0.2",
"description": "pybind11 is a lightweight header-only library that exposes C++ types in Python and vice versa, mainly to create Python bindings of existing C++ code",
"homepage": "https://github.com/pybind/pybind11",
"license": "BSD-3-Clause",
Expand Down
30 changes: 19 additions & 11 deletions onnxruntime/python/tools/pytorch_export_contrib_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
PyTorch-ONNX exporter (torch.onnx.export).
"""

import contextlib
import typing

try:
Expand All @@ -22,7 +23,7 @@
_registered_ops: typing.AbstractSet[str] = set()


def _reg(symbolic_fn: typing.Callable, namespace: str = ""):
def _reg(symbolic_fn: typing.Callable, namespace: str = "aten"):
name = f"{namespace}::{symbolic_fn.__name__}"
torch.onnx.register_custom_op_symbolic(name, symbolic_fn, _OPSET_VERSION)
_registered_ops.add(name)
Expand All @@ -49,13 +50,6 @@ def grid_sampler(g, input, grid, mode, padding_mode, align_corners):
padding_mode_str = ["zeros", "border", "reflection"][padding_mode]
align_corners = int(symbolic_helper._maybe_get_const(align_corners, "b"))

# From opset v13 onward, the output shape can be specified with
# (N, C, H, W) (N, H_out, W_out, 2) => (N, C, H_out, W_out)
# input_shape = input.type().sizes()
# gird_shape = grid.type().sizes()
# output_shape = input_shape[:2] + gird_shape[1:3]
# g.op(...).setType(input.type().with_sizes(output_shape))

return g.op(
"com.microsoft::GridSample",
input,
Expand All @@ -71,15 +65,24 @@ def inverse(g, self):
return g.op("com.microsoft::Inverse", self).setType(self.type())

_reg(inverse)
torch.onnx.register_custom_op_symbolic("aten::linalg_inv", inverse, _OPSET_VERSION)
_registered_ops.add("aten::linalg_inv")

def gelu(g, self: torch._C.Value, approximate="none"):
# PyTorch can emit aten::gelu with or without the optional approximate arg.
if not isinstance(approximate, str):
approximate = symbolic_helper._maybe_get_const(approximate, "s")

@torch.onnx.symbolic_helper.parse_args("v", "s")
def gelu(g, self: torch._C.Value, approximate: str = "none"):
# Use microsoft::Gelu for performance if possible. It only supports approximate == "none"
# Use microsoft::Gelu for performance if possible. It only supports approximate == "none".
if approximate == "none":
return g.op("com.microsoft::Gelu", self).setType(self.type())
return torch.onnx.symbolic_opset9.gelu(g, self, approximate)

_reg(gelu)
# Some PyTorch versions dispatch GELU symbolic lookup by exporter opset.
# Registering across stable opsets keeps ORT Gelu fusion consistently enabled.
for opset in range(9, 21):
torch.onnx.register_custom_op_symbolic("aten::gelu", gelu, opset)

def triu(g, self, diagonal):
return g.op("com.microsoft::Trilu", self, diagonal, upper_i=1).setType(self.type())
Expand Down Expand Up @@ -127,3 +130,8 @@ def unregister():
for version in symbolic_helper._onnx_stable_opsets:
if version >= _OPSET_VERSION and symbolic_registry.is_registered_op(kind, namespace, version):
del symbolic_registry._registry[(namespace, version)][kind]

# Also clean up gelu's multi-opset registrations (see register()).
for opset in range(9, 21):
with contextlib.suppress(Exception):
torch.onnx.unregister_custom_op_symbolic("aten::gelu", opset)
6 changes: 3 additions & 3 deletions onnxruntime/python/tools/transformers/fusion_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -1112,11 +1112,11 @@ def fuse(self, node, input_name_to_nodes, output_name_to_node):
if (
(mul_val is None)
or not (isinstance(mul_val, np.ndarray) and mul_val.size == 1)
or (float(mul_val) >= 0)
or (mul_val.item() >= 0)
):
return
if float(mul_val) != -10000:
self.mask_filter_value = float(mul_val)
if mul_val.item() != -10000:
self.mask_filter_value = mul_val.item()

if matmul_v.input[0] == root_input and matmul_q.input[0] == root_input and matmul_k.input[0] == root_input:
mask_index = self.attention_mask.process_mask(mask_nodes[-1].input[0]) if not is_no_mask_attention else None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ def do_export_internal(model: nn.Module, onnx_io_tuple: tuple, onnx_inputs: tupl
input_names=onnx_inp_names,
output_names=onnx_out_names,
dynamic_axes=onnx_dynamic_axes,
dynamo=False,
)

onnx_path.unlink(missing_ok=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ def export_onnx(
input_names=input_names,
output_names=output_names,
dynamic_axes=dynamic_axes,
opset_version=11,
opset_version=14,
do_constant_folding=True,
use_external_data_format=True,
verbose=verbose,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ def run_torchscript_separate_export(
opset_version=torch_export_onnx_opset_version,
do_constant_folding=True,
verbose=args.verbose,
dynamo=False,
)

# Check decoder_model.onnx and save all external data to one file
Expand Down Expand Up @@ -294,6 +295,7 @@ def run_torchscript_separate_export(
opset_version=torch_export_onnx_opset_version,
do_constant_folding=True,
verbose=args.verbose,
dynamo=False,
)

# Check decoder_with_past_model.onnx and save all external data to one file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,9 @@ def export_onnx(
input_names=input_names,
output_names=output_names,
dynamic_axes=dynamic_axes,
opset_version=17,
opset_version=18,
do_constant_folding=True,
dynamo=False,
verbose=verbose,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,9 @@ def export_onnx(
input_names=input_names,
output_names=output_names,
dynamic_axes=dynamic_axes,
opset_version=17,
opset_version=18,
do_constant_folding=True,
dynamo=False,
verbose=verbose,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,9 @@ def export_onnx(
input_names=input_names,
output_names=output_names,
dynamic_axes=dynamic_axes,
opset_version=17,
opset_version=18,
do_constant_folding=True,
dynamo=False,
verbose=verbose,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def torch_onnx_export(
keep_initializers_as_inputs=keep_initializers_as_inputs,
custom_opsets=custom_opsets,
export_modules_as_functions=export_modules_as_functions,
dynamo=False,
)
else:
torch.onnx.export(
Expand Down
15 changes: 10 additions & 5 deletions onnxruntime/test/python/test_pytorch_export_contrib_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ def setUp(self):
torch.manual_seed(0)
pytorch_export_contrib_ops.register()

def tearDown(self):
pytorch_export_contrib_ops.unregister()

def run_test(
self,
model,
Expand Down Expand Up @@ -101,6 +104,7 @@ def run_test(
input_names=input_names,
output_names=output_names,
custom_opsets=custom_opsets,
dynamo=False,
)

# compute onnxruntime output prediction
Expand Down Expand Up @@ -143,12 +147,13 @@ def test_gelu_is_fused_by_default(self):
f,
opset_version=self.opset_version,
custom_opsets={"com.microsoft": 1},
dynamo=False,
)
f.seek(0)
onnx_model = onnx.load(f)
node = onnx_model.graph.node[0]
self.assertEqual(node.op_type, "Gelu")
self.assertEqual(node.domain, "com.microsoft")
# Default GELU should be mapped to ORT contrib Gelu for performance.
gelu_nodes = [n for n in onnx_model.graph.node if n.op_type == "Gelu" and n.domain == "com.microsoft"]
self.assertEqual(len(gelu_nodes), 1)

@parameterized.parameterized.expand([("default_approximate", "none"), ("tanh_approximate", "tanh")])
@unittest.skipIf(_torch_version_lower_than("1.12"), "Gelu's approximate parameter unsupported in PyTorch < 1.12")
Expand Down Expand Up @@ -230,8 +235,8 @@ def forward(self, input):
# IR version 4 style export.
ONNXExporterTest_opset9_IRv4 = type(
"TestONNXRuntime_opset9_IRv4",
(unittest.TestCase,),
dict(ONNXExporterTest.__dict__, keep_initializers_as_inputs=False),
(ONNXExporterTest,),
dict(keep_initializers_as_inputs=False),
)


Expand Down
1 change: 1 addition & 0 deletions onnxruntime/test/python/transformers/parity_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def export_onnx(model, onnx_model_path, float16, hidden_size, device):
dynamic_axes=dynamic_axes,
opset_version=11,
do_constant_folding=True,
dynamo=False,
)
print("exported:", onnx_model_path)

Expand Down
7 changes: 6 additions & 1 deletion onnxruntime/test/python/transformers/test_gelu_fusions.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,17 +75,22 @@ def test_fusions(self, test_case, dynamo):
dummy_input = torch.ones(3, dtype=torch.float32)
test_name = f"{operator}_{source}"
onnx_path = f"{test_name}.onnx"

# For Torch 2.10+, torch.nn.functional.gelu(approximate="tanh") exports as Gelu node.
# So we force opset_version=18 here.
torch.onnx.export(
model,
(dummy_input,),
onnx_path,
input_names=["input"],
output_names=["output"],
dynamo=dynamo,
opset_version=18,
dynamo=False,
optimize=True, # Only meaningful when dynamo is True
)
optimizer = optimize_model(onnx_path, "bert")
# optimizer.save_model_to_file(f"{operator}_{source}_opt.onnx")

os.remove(onnx_path)
# Remove the associated .data file (dynamo)
data_path = onnx_path + ".data"
Expand Down
Loading
Loading