onnxruntime/python/tools/symbolic_shape_infer.py

-Original file line number
+Diff line change
@@ Expand Up @@
                 "PackedMultiHeadAttention": self._infer_PackedMultiHeadAttention,
                 "PagedAttention": self._infer_PagedAttention,
                 "PythonOp": self._infer_PythonOp,
+                "QLinearAdd": self._infer_QLinearBinary,
+                "QLinearMul": self._infer_QLinearBinary,
                 "QuantizeLinear": self._infer_QuantizeLinear,
                 "QuickGelu": self._infer_FastGelu,
                 "RelativePositionBias": self._infer_RelativePositionBias,
@@ Expand Down Expand Up / @@ -490,6 +492,8 @@ def _onnx_infer_single_node(self, node): @@
                 "SkipSimplifiedLayerNormalization",
                 "SparseAttention",
                 "SkipGroupNorm",
+                "QLinearAdd",
+                "QLinearMul",
             ]
             if not skip_infer:
@@ Expand Down Expand Up / @@ -1040,6 +1044,20 @@ def _infer_QuantizeLinear(self, node): # noqa: N802 @@
             vi = self.known_vi_[node.output[0]]
             vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, output_shape))
+        def _infer_QLinearBinary(self, node):  # noqa: N802
+            # Get the output data type from the first input to QLinearAdd / QLinearMul.
+            output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
+            # The inputs are first and fourth operands respectively.
+            input_1_shape = self._get_shape(node, 0)
+            input_2_shape = self._get_shape(node, 3)
+            # Compute the broadcasted shape
+            new_shape = self._broadcast_shapes(input_1_shape, input_2_shape)
+            vi = self.known_vi_[node.output[0]]
+            vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, new_shape))
         def _infer_Einsum(self, node):  # noqa: N802
             # ref:https://github.com/onnx/onnx/blob/623dfaa0151b2e4ce49779c3ec31cbd78c592b80/onnx/defs/math/defs.cc#L3275
             equation = get_attribute(node, "equation")
@@ Expand Down @@

onnxruntime/test/python/onnxruntime_test_python_symbolic_shape_infer.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -644,6 +644,87 @@ def test_matmulnbits(self): @@
             ]
             self._check_shapes(graph, inferred.graph, expected_shapes)
+        def test_qlinear_binary(self):
+            """
+            Test ONNX QLinearAdd op ('com.microsoft' domain). .
+            Check that the output shape is propagated from the inputs to the op with broadcasting.
+            """
+            initializers = [
+                helper.make_tensor(
+                    "A_scale",
+                    TensorProto.FLOAT,
+                    [],
+                    [0.7],
+                ),
+                helper.make_tensor(
+                    "A_zero_point",
+                    TensorProto.UINT8,
+                    [],
+                    [158],
+                ),
+                helper.make_tensor(
+                    "B_scale",
+                    TensorProto.FLOAT,
+                    [],
+                    [0.02],
+                ),
+                helper.make_tensor(
+                    "B_zero_point",
+                    TensorProto.UINT8,
+                    [],
+                    [5],
+                ),
+                helper.make_tensor(
+                    "C_scale",
+                    TensorProto.FLOAT,
+                    [],
+                    [0.26],
+                ),
+                helper.make_tensor(
+                    "C_zero_point",
+                    TensorProto.UINT8,
+                    [],
+                    [0],
+                ),
+            ]
+            nodes = [
+                helper.make_node(
+                    "QLinearAdd",
+                    inputs=[
+                        "A",
+                        "A_scale",
+                        "A_zero_point",
+                        "B",
+                        "B_scale",
+                        "B_zero_point",
+                        "C_scale",
+                        "C_zero_point",
+                    ],
+                    outputs=["C"],
+                    domain="com.microsoft",
+                ),
+            ]
+            inputs = [
+                helper.make_tensor_value_info("A", TensorProto.UINT8, ["b", 4, 128]),
+                helper.make_tensor_value_info("B", TensorProto.UINT8, ["b", 1, 4, 1, 128]),
+            ]
+            outputs = [
+                helper.make_tensor_value_info("C", TensorProto.UNDEFINED, None),
+            ]
+            graph = helper.make_graph(nodes, "QLinearAdd_Test", inputs, outputs, initializers)
+            model = helper.make_model(graph)
+            inferred = SymbolicShapeInference.infer_shapes(model, auto_merge=True)
+            expected_shapes = [
+                helper.make_tensor_value_info("C", TensorProto.UINT8, ["b", 1, 4, 4, 128]),
+            ]
+            self._check_shapes(graph, inferred.graph, expected_shapes)
     class TestSymbolicShapeInferenceForSlice(unittest.TestCase):
         def check_slice_of_concat(self, input_dims, start, end, step, expected_output_dim):
@@ Expand Down @@

[Shape Inference] Add shape inference for QLinearAdd and QLinearMul ops #24090

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

tianleiwu merged 2 commits into microsoft:main from pravg-amd:qlinear_binary_shape_infer

Mar 24, 2025

-Original file line number
+Diff line change
@@ Expand Up @@
                 "PackedMultiHeadAttention": self._infer_PackedMultiHeadAttention,
                 "PagedAttention": self._infer_PagedAttention,
                 "PythonOp": self._infer_PythonOp,
+                "QLinearAdd": self._infer_QLinearBinary,
+                "QLinearMul": self._infer_QLinearBinary,
                 "QuantizeLinear": self._infer_QuantizeLinear,
                 "QuickGelu": self._infer_FastGelu,
                 "RelativePositionBias": self._infer_RelativePositionBias,
@@ Expand Down Expand Up / @@ -490,6 +492,8 @@ def _onnx_infer_single_node(self, node): @@
                 "SkipSimplifiedLayerNormalization",
                 "SparseAttention",
                 "SkipGroupNorm",
+                "QLinearAdd",
+                "QLinearMul",
             ]
             if not skip_infer:
@@ Expand Down Expand Up / @@ -1040,6 +1044,20 @@ def _infer_QuantizeLinear(self, node): # noqa: N802 @@
             vi = self.known_vi_[node.output[0]]
             vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, output_shape))
+        def _infer_QLinearBinary(self, node):  # noqa: N802
+            # Get the output data type from the first input to QLinearAdd / QLinearMul.
+            output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
+            # The inputs are first and fourth operands respectively.
+            input_1_shape = self._get_shape(node, 0)
+            input_2_shape = self._get_shape(node, 3)
+            # Compute the broadcasted shape
+            new_shape = self._broadcast_shapes(input_1_shape, input_2_shape)
+            vi = self.known_vi_[node.output[0]]
+            vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, new_shape))
         def _infer_Einsum(self, node):  # noqa: N802
             # ref:https://github.com/onnx/onnx/blob/623dfaa0151b2e4ce49779c3ec31cbd78c592b80/onnx/defs/math/defs.cc#L3275
             equation = get_attribute(node, "equation")
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -644,6 +644,87 @@ def test_matmulnbits(self): @@
             ]
             self._check_shapes(graph, inferred.graph, expected_shapes)
+        def test_qlinear_binary(self):
+            """
+            Test ONNX QLinearAdd op ('com.microsoft' domain). .
+            Check that the output shape is propagated from the inputs to the op with broadcasting.
+            """
+            initializers = [
+                helper.make_tensor(
+                    "A_scale",
+                    TensorProto.FLOAT,
+                    [],
+                    [0.7],
+                ),
+                helper.make_tensor(
+                    "A_zero_point",
+                    TensorProto.UINT8,
+                    [],
+                    [158],
+                ),
+                helper.make_tensor(
+                    "B_scale",
+                    TensorProto.FLOAT,
+                    [],
+                    [0.02],
+                ),
+                helper.make_tensor(
+                    "B_zero_point",
+                    TensorProto.UINT8,
+                    [],
+                    [5],
+                ),
+                helper.make_tensor(
+                    "C_scale",
+                    TensorProto.FLOAT,
+                    [],
+                    [0.26],
+                ),
+                helper.make_tensor(
+                    "C_zero_point",
+                    TensorProto.UINT8,
+                    [],
+                    [0],
+                ),
+            ]
+            nodes = [
+                helper.make_node(
+                    "QLinearAdd",
+                    inputs=[
+                        "A",
+                        "A_scale",
+                        "A_zero_point",
+                        "B",
+                        "B_scale",
+                        "B_zero_point",
+                        "C_scale",
+                        "C_zero_point",
+                    ],
+                    outputs=["C"],
+                    domain="com.microsoft",
+                ),
+            ]
+            inputs = [
+                helper.make_tensor_value_info("A", TensorProto.UINT8, ["b", 4, 128]),
+                helper.make_tensor_value_info("B", TensorProto.UINT8, ["b", 1, 4, 1, 128]),
+            ]
+            outputs = [
+                helper.make_tensor_value_info("C", TensorProto.UNDEFINED, None),
+            ]
+            graph = helper.make_graph(nodes, "QLinearAdd_Test", inputs, outputs, initializers)
+            model = helper.make_model(graph)
+            inferred = SymbolicShapeInference.infer_shapes(model, auto_merge=True)
+            expected_shapes = [
+                helper.make_tensor_value_info("C", TensorProto.UINT8, ["b", 1, 4, 4, 128]),
+            ]
+            self._check_shapes(graph, inferred.graph, expected_shapes)
     class TestSymbolicShapeInferenceForSlice(unittest.TestCase):
         def check_slice_of_concat(self, input_dims, start, end, step, expected_output_dim):
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[Shape Inference] Add shape inference for QLinearAdd and QLinearMul ops #24090

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!