Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions onnxruntime/python/tools/quantization/base_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def __init__(
# the symmetry (i.e., signed integer types will use symmetric quantization). See `def is_weight_symmetric()`
self._is_weight_symmetric: bool | None = self.extra_options.get("WeightSymmetric", None)
self.is_activation_symmetric = self.extra_options.get("ActivationSymmetric", False)
self.is_activation_restricted_asymmetric = self.extra_options.get("ActivationRestrictedAsymmetric", False)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion (non-blocking): if a user sets both ActivationSymmetric=True and ActivationRestrictedAsymmetric=True, the restricted path silently does nothing because symmetric=True fails the not symmetric guard in both quantizers. This is almost certainly a misconfiguration. Consider logging a warning here when both flags are enabled:

if self.is_activation_symmetric and self.is_activation_restricted_asymmetric:
    logger.warning("ActivationSymmetric and ActivationRestrictedAsymmetric are mutually exclusive; "
                   "ActivationRestrictedAsymmetric will be ignored.")

self.min_real_range = self.extra_options.get("MinimumRealRange")

self.activation_qType = getattr(activation_qType, "tensor_type", activation_qType)
Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/python/tools/quantization/onnx_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
ms_domain,
quantize_onnx_initializer,
save_and_reload_model_with_shape_infer,
snap_zero_point_to_uint8,
tensor_proto_to_array,
)
from .registry import CreateOpQuantizer
Expand Down Expand Up @@ -1157,6 +1158,8 @@ def calculate_quantization_params(self):
reduce_range = quant_overrides.get("reduce_range", False)
qmin, qmax = get_qmin_qmax_for_qType(quant_type, reduce_range=reduce_range, symmetric=symmetric)
zero, scale = compute_scale_zp(rmin, rmax, qmin, qmax, symmetric, self.min_real_range)
if self.is_activation_restricted_asymmetric and quant_type == onnx.TensorProto.UINT8 and not symmetric:
zero, scale = snap_zero_point_to_uint8(rmin, rmax)
Comment thread
tianleiwu marked this conversation as resolved.
Outdated

quantization_params[tensor_name] = QuantizationParams(zero_point=zero, scale=scale, quant_type=quant_type)

Expand Down
7 changes: 7 additions & 0 deletions onnxruntime/python/tools/quantization/qdq_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
ms_domain,
normalize_axis,
quantize_onnx_initializer,
snap_zero_point_to_uint8,
tensor_proto_to_array,
)
from .registry import CreateQDQQuantizer
Expand Down Expand Up @@ -1320,6 +1321,12 @@ def calc_quant_params(self, tensor_data: TensorData, quant_overrides: dict[str,
reduce_range = quant_overrides.get("reduce_range", False)
qmin, qmax = get_qmin_qmax_for_qType(quant_type, reduce_range=reduce_range, symmetric=symmetric)
zero, scale = compute_scale_zp(rmin, rmax, qmin, qmax, symmetric, self.min_real_range)
if (
self.is_activation_restricted_asymmetric
and quant_type == onnx.TensorProto.UINT8
and not symmetric
):
zero, scale = snap_zero_point_to_uint8(rmin, rmax)

Comment thread
tianleiwu marked this conversation as resolved.
Outdated
return QuantizationParams(zero_point=zero.squeeze(), scale=scale.squeeze(), quant_type=quant_type)

Expand Down
27 changes: 27 additions & 0 deletions onnxruntime/python/tools/quantization/quant_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,33 @@ def compute_scale_zp(rmin, rmax, qmin, qmax, symmetric=False, min_real_range=Non
return [zero_point, scale]


def snap_zero_point_to_uint8(rmin, rmax):
Comment thread
tianleiwu marked this conversation as resolved.
Outdated
"""Snap a uint8 activation zero-point to 0 (when rmin >= 0) or 128 (when rmin < 0).

Used by the ActivationRestrictedAsymmetric quantization option. Recomputes scale so the
dequantized range still covers [rmin, rmax] without clipping.

:parameter rmin: calibrated minimum activation value (numpy scalar)
:parameter rmax: calibrated maximum activation value (numpy scalar)
:return: (zero_point, scale) with zero_point dtype uint8 and scale dtype float32
"""
rmin = float(numpy.squeeze(rmin))
rmax = float(numpy.squeeze(rmax))
if rmax <= rmin:
# Degenerate range – return neutral values
return numpy.array(0, dtype=numpy.uint8), numpy.array(1.0, dtype=numpy.float32)
if rmin >= 0.0:
zero_point = numpy.array(0, dtype=numpy.uint8)
scale = numpy.array(rmax / 255.0, dtype=numpy.float32)
else:
zero_point = numpy.array(128, dtype=numpy.uint8)
# Choose scale that covers both negative and positive halves without clipping
scale_neg = -rmin / 128.0 # scale needed to represent rmin at q=0
scale_pos = rmax / 127.0 # scale needed to represent rmax at q=255
Comment thread
tianleiwu marked this conversation as resolved.
Outdated
scale = numpy.array(max(scale_neg, scale_pos), dtype=numpy.float32)
Comment thread
tianleiwu marked this conversation as resolved.
Outdated
return zero_point, scale


def compute_scale_zp_float8(element_type, std):
"""Calculate the scale s for a float8 type (E4M3FN).
The function assumes the coefficient distribution and the float 8
Expand Down
8 changes: 8 additions & 0 deletions onnxruntime/python/tools/quantization/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ def __init__(
key value pair dictionary for various options in different case. Current used:
extra.Sigmoid.nnapi = True/False (Default is False)
ActivationSymmetric = True/False: symmetrize calibration data for activations (default is False).
ActivationRestrictedAsymmetric = True/False: (uint8 activations only) snap zero-point to 0
Comment thread
tianleiwu marked this conversation as resolved.
Outdated
(rmin>=0) or 128 (rmin<0); recompute scale accordingly (default is False).
Comment thread
tianleiwu marked this conversation as resolved.
Outdated
WeightSymmetric = True/False: symmetrize calibration data for weights (default is True).
EnableSubgraph = True/False : Default is False. If enabled, subgraph will be quantized.
Dyanmic mode currently is supported. Will support more in future.
Expand Down Expand Up @@ -419,6 +421,8 @@ def __init__(
extra_options: key value pair dictionary for various options in different case. Current used:
extra.Sigmoid.nnapi = True/False (Default is False)
ActivationSymmetric = True/False: symmetrize calibration data for activations (default is False).
ActivationRestrictedAsymmetric = True/False: (uint8 activations only) snap zero-point to 0
(rmin>=0) or 128 (rmin<0); recompute scale accordingly (default is False).
WeightSymmetric = True/False: symmetrize calibration data for weights (default is True).
EnableSubgraph = True/False :
Default is False. If enabled, subgraph will be quantized. Dynamic mode currently is supported. Will
Expand Down Expand Up @@ -544,6 +548,8 @@ def quantize_static(
key value pair dictionary for various options in different case. Current used:
extra.Sigmoid.nnapi = True/False (Default is False)
ActivationSymmetric = True/False: symmetrize calibration data for activations (default is False).
ActivationRestrictedAsymmetric = True/False: (uint8 activations only) snap zero-point to 0
(rmin>=0) or 128 (rmin<0); recompute scale accordingly (default is False).
WeightSymmetric = True/False: symmetrize calibration data for weights (default is True).
EnableSubgraph = True/False : Default is False. If enabled, subgraph will be quantized.
Dyanmic mode currently is supported. Will support more in the future.
Expand Down Expand Up @@ -834,6 +840,8 @@ def quantize_dynamic(
key value pair dictionary for various options in different case. Current used:
extra.Sigmoid.nnapi = True/False (Default is False)
ActivationSymmetric = True/False: symmetrize calibration data for activations (default is False).
ActivationRestrictedAsymmetric = True/False: (uint8 activations only) snap zero-point to 0
(rmin>=0) or 128 (rmin<0); recompute scale accordingly (default is False).
WeightSymmetric = True/False: symmetrize calibration data for weights (default is True).
EnableSubgraph = True/False :
Default is False. If enabled, subgraph will be quantized. Dynamic mode currently is supported. Will
Expand Down
81 changes: 81 additions & 0 deletions onnxruntime/test/python/quantization/test_symmetric_flag.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,84 @@ def test_3(self):

if __name__ == "__main__":
unittest.main()


class TestRestrictedAsymmetricFlag(unittest.TestCase):
Comment thread
tianleiwu marked this conversation as resolved.
Comment thread
tianleiwu marked this conversation as resolved.
Outdated
"""Tests for ActivationRestrictedAsymmetric extra-option (uint8 zero-point snapping)."""

def setUp(self):
# All-positive activations (post-ReLU-like): rmin >= 0, expect zp == 0
self.positive_activations = [
np.zeros([1, 2, 32, 32], dtype="float32"),
np.ones([1, 2, 32, 32], dtype="float32") * 2.0,
]
# Signed-range activations: rmin < 0, expect zp == 128
self.signed_activations = [
-1.0 * np.ones([1, 2, 32, 32], dtype="float32"),
+2.0 * np.ones([1, 2, 32, 32], dtype="float32"),
]

self.weights = np.concatenate(
(
-1 * np.ones([1, 1, 2, 2], dtype="float32"),
+1 * np.ones([1, 1, 2, 2], dtype="float32"),
),
axis=1,
)

def _quantize(self, activations, extra_options):
act = helper.make_tensor_value_info("ACT", TensorProto.FLOAT, activations[0].shape)
res = helper.make_tensor_value_info("RES", TensorProto.FLOAT, [None, None, None, None])
wgt_init = numpy_helper.from_array(self.weights, "WGT")
conv_node = onnx.helper.make_node("Conv", ["ACT", "WGT"], ["RES"])
graph = helper.make_graph([conv_node], "test", [act], [res], initializer=[wgt_init])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 11)])
onnx.save(model, "model_restricted.onnx")

class DummyDataReader(quantization.CalibrationDataReader):
def __init__(self_inner):
self_inner.iterator = ({"ACT": act} for act in activations)

def get_next(self_inner):
return next(self_inner.iterator, None)

quantization.quantize_static(
model_input="model_restricted.onnx",
model_output="quantized_restricted.onnx",
calibration_data_reader=DummyDataReader(),
quant_format=quantization.QuantFormat.QOperator,
activation_type=quantization.QuantType.QUInt8,
weight_type=quantization.QuantType.QUInt8,
op_types_to_quantize=["Conv", "MatMul"],
extra_options=extra_options,
)

model = onnx.load("quantized_restricted.onnx")
act_zp = next(init for init in model.graph.initializer if init.name == "ACT_zero_point").int32_data[0]
act_sc = next(init for init in model.graph.initializer if init.name == "ACT_scale").float_data[0]
return act_zp, act_sc

def test_positive_activations_zp_is_zero(self):
"""All-positive range (rmin >= 0): zero-point must snap to 0."""
act_zp, act_sc = self._quantize(
self.positive_activations,
extra_options={"ActivationRestrictedAsymmetric": True},
)
self.assertEqual(act_zp, 0, f"Expected zp=0 for rmin>=0, got {act_zp}")
Comment thread
tianleiwu marked this conversation as resolved.

def test_signed_activations_zp_is_128(self):
"""Signed range (rmin < 0): zero-point must snap to 128."""
act_zp, act_sc = self._quantize(
self.signed_activations,
extra_options={"ActivationRestrictedAsymmetric": True},
)
self.assertEqual(act_zp, 128, f"Expected zp=128 for rmin<0, got {act_zp}")

def test_option_false_does_not_snap(self):
"""When ActivationRestrictedAsymmetric is False, behavior matches standard asymmetric (zp != 128 for signed)."""
act_zp, act_sc = self._quantize(
self.signed_activations,
extra_options={"ActivationRestrictedAsymmetric": False},
)
# Standard asymmetric uint8 with rmin=-1, rmax=2 should give non-128 zp (it's ~85)
self.assertNotEqual(act_zp, 128, f"Option=False should not snap to 128, got {act_zp}")