Skip to content

Commit

Permalink
Add decorator for custom op and inductor decomp registration
Browse files Browse the repository at this point in the history
Summary:
This PR adds a decorator to register custom op and also an inductor dcomposition.

The goal is for torch.export path to be able to see high level ops like quantize_affine instead of breaking down the op, this is because some backends like xnnpack wants to work with these higher level ops.

This is a redo for #408, difference is we can preserve the enums on the python side in this PR

Test Plan:
regression tests:
python test/quantization/test_quant_api.py
python test/integration/test_integration.py

also need to check performance with python tutorials/quantize_vit/run_vit_b_quant.py

Reviewers:

Subscribers:

Tasks:

Tags:
  • Loading branch information
jerryzh168 committed Jun 25, 2024
1 parent c2cf973 commit 5186214
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 21 deletions.
7 changes: 4 additions & 3 deletions test/integration/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -1244,7 +1244,7 @@ def test_autoquant_manual(self, device, dtype):
out3 = mod(example_input)
sqnr2 = SQNR(out, out3)
self.assertTrue(sqnr2 >= 30)


@parameterized.expand(combine_parameters(COMMON_DEVICE_DTYPE,
[
Expand Down Expand Up @@ -1375,8 +1375,8 @@ class TestExport(unittest.TestCase):
@parameterized.expand(
list(itertools.product(TENSOR_SUBCLASS_APIS, COMMON_DEVICES, COMMON_DTYPES)),
)
@run_supported_device_dtype
def test_aoti(self, api, test_device, test_dtype):
# @run_supported_device_dtype
def test_export(self, api, test_device, test_dtype):
if not TORCH_VERSION_AFTER_2_4:
self.skipTest("aoti compatibility requires 2.4+.")

Expand Down Expand Up @@ -1416,6 +1416,7 @@ def forward(self, x):
model = torch.export.export(model, example_inputs).module()
after_export = model(x)
self.assertTrue(torch.equal(after_export, ref))
print("model:", model)

class TestUtils(unittest.TestCase):
@parameterized.expand(COMMON_DEVICE_DTYPE)
Expand Down
137 changes: 119 additions & 18 deletions torchao/quantization/quant_primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

from enum import Enum
from enum import Enum, auto
from typing import List, Optional, Tuple, Dict
import torch

from torchao.kernel.intmm import int_scaled_matmul
from torchao.kernel.intmm import safe_int_mm
from torchao.utils import TORCH_VERSION_AFTER_2_3
from torchao.utils import (
TORCH_VERSION_AFTER_2_3,
TORCH_VERSION_AFTER_2_5,
)


__all__ = [
Expand All @@ -34,17 +37,17 @@ class MappingType(Enum):
based on this mapping
e.g. scale = (10.2 - (-3.5)) / (7 - (-8))
"""
SYMMETRIC = 0
ASYMMETRIC = 1
SYMMETRIC = auto()
ASYMMETRIC = auto()

class ZeroPointDomain(Enum):
"""Enum that indicate whether zero_point is in integer domain or floating point domain
integer domain: quantized_val = (float_val / scale) (integer) + zero_point (integer)
float domain: quantized_val = (float_val - (zero_point (float) - scale * mid_point)) / scale
"""
INT = 0
FLOAT = 1
INT = auto()
FLOAT = auto()

"""
Map from dtype to the bound value of integers
Expand All @@ -69,6 +72,20 @@ class ZeroPointDomain(Enum):
})


def register_custom_op(name: str):
from torch._inductor.decomposition import register_decomposition

def decorator(fn):
if TORCH_VERSION_AFTER_2_5:
opdef = torch.library.custom_op(name, mutates_args=())(fn)
opdef.register_fake(fn)
register_decomposition([opdef._opoverload])(fn)
return opdef
else:
return fn

return decorator

# TODO: decide on if we want to allow custom quant_min/quant_max here
def _get_and_check_qmin_qmax(dtype, quant_min, quant_max):
"""Get quant_min and quant_max args based on dtype and also
Expand Down Expand Up @@ -140,7 +157,7 @@ def quantize_affine(
quant_min: Optional[int] = None,
quant_max: Optional[int] = None,
zero_point_domain: ZeroPointDomain = ZeroPointDomain.INT,
):
) -> torch.Tensor:
"""
Args:
input (torch.Tensor): original float32, float16 or bfloat16 Tensor
Expand Down Expand Up @@ -174,6 +191,31 @@ def quantize_affine(
Output:
quantized tensor with requested dtype
"""
return _quantize_affine(
input,
block_size,
scale,
zero_point,
output_dtype,
quant_min,
quant_max,
zero_point_domain.name,
)


@register_custom_op("quant::quantize_affine")
def _quantize_affine(
input: torch.Tensor,
block_size: List[int],
scale: torch.Tensor,
zero_point: Optional[torch.Tensor],
output_dtype: torch.dtype,
quant_min: Optional[int] = None,
quant_max: Optional[int] = None,
zero_point_domain: str = "INT",
) -> torch.Tensor:
"""op definition that has compatible signatures with custom op library
"""
# TODO: validations
# TODO: validate scale/zero_point dimensions are compatible with block_size
assert input.dtype in [torch.float32, torch.float16, torch.bfloat16], f"Unsupported input dtype: {input.dtype}"
Expand All @@ -188,12 +230,12 @@ def quantize_affine(
if zero_point is not None:
zero_point = zero_point.view(shape_after_reduction)

if zero_point_domain == ZeroPointDomain.INT:
if zero_point_domain == ZeroPointDomain.INT.name:
quant = torch.clamp(
torch.round(input * (1.0 / scale)) + zero_point, quant_min, quant_max
).to(output_dtype)
else:
assert zero_point_domain == ZeroPointDomain.FLOAT
assert zero_point_domain == ZeroPointDomain.FLOAT.name
mid_point = (quant_max + quant_min + 1) / 2
min_val = zero_point - scale * mid_point
quant = (
Expand All @@ -216,7 +258,7 @@ def dequantize_affine(
zero_point_domain: ZeroPointDomain = ZeroPointDomain.INT,
*,
output_dtype: torch.dtype = torch.float32,
):
) -> torch.Tensor:
"""
Args:
input (torch.Tensor): quantized tensor, should match the dtype `dtype` argument
Expand All @@ -238,6 +280,34 @@ def dequantize_affine(
Output:
dequantized Tensor, with requested dtype or fp32
"""
return _dequantize_affine(
input,
block_size,
scale,
zero_point,
input_dtype,
quant_min,
quant_max,
zero_point_domain.name,
output_dtype=output_dtype,
)


@register_custom_op("quant::dequantize_affine")
def _dequantize_affine(
input: torch.Tensor,
block_size: List[int],
scale: torch.Tensor,
zero_point: Optional[torch.Tensor],
input_dtype: torch.dtype,
quant_min: Optional[int] = None,
quant_max: Optional[int] = None,
zero_point_domain: str = "INT",
*,
output_dtype: torch.dtype = torch.float32,
) -> torch.Tensor:
"""op definition that has compatible signatures with custom op library
"""

# TODO: validations
# TODO: validate scale/zero_point dimensions are compatible with block_size
Expand All @@ -255,16 +325,16 @@ def dequantize_affine(
if zero_point is not None:
zero_point = zero_point.view(shape_after_reduction)

if zero_point_domain == ZeroPointDomain.INT:
if zero_point_domain == ZeroPointDomain.INT.name:
# Force a copy to avoid input modification due
# to upcoming in-place operations.
dequant = input.to(torch.int32, copy=True)
if zero_point is not None:
dequant -= zero_point.to(torch.int32)
dequant = dequant - zero_point.to(torch.int32)
dequant = dequant.to(output_dtype)
dequant *= scale
dequant = dequant * scale
else:
assert zero_point_domain == ZeroPointDomain.FLOAT, f"Unexpected zero point domain: {zero_point_domain}"
assert zero_point_domain == ZeroPointDomain.FLOAT.name, f"Unexpected zero point domain: {zero_point_domain}"
mid_point = (quant_max + quant_min + 1) / 2
# This should allocate new memory and avoid input modification
dequant = input - mid_point
Expand Down Expand Up @@ -320,8 +390,38 @@ def choose_qparams_affine(
Output:
Tuple of scales and zero_points Tensor with requested dtype
"""
return _choose_qparams_affine(
input,
mapping_type.name,
block_size,
target_dtype,
quant_min,
quant_max,
eps,
scale_dtype,
zero_point_dtype,
preserve_zero,
zero_point_domain.name
)

@register_custom_op("quant::choose_qparams_affine")
def _choose_qparams_affine(
input: torch.Tensor,
mapping_type: str,
block_size: List[int],
target_dtype: torch.dtype,
quant_min: Optional[int] = None,
quant_max: Optional[int] = None,
eps: Optional[float] = None,
scale_dtype: Optional[torch.dtype] = None,
zero_point_dtype: Optional[torch.dtype] = None,
preserve_zero: bool = True,
zero_point_domain: str = "INT",
) -> Tuple[torch.Tensor, torch.Tensor]:
"""op definition that has compatible signatures with custom op library
"""
quant_min, quant_max = _get_and_check_qmin_qmax(target_dtype, quant_min, quant_max)
assert mapping_type in [MappingType.SYMMETRIC, MappingType.ASYMMETRIC], f"Unsupported mapping type: {mapping_type}"
assert mapping_type in [MappingType.SYMMETRIC.name, MappingType.ASYMMETRIC.name], f"Unsupported mapping type: {mapping_type}"

if scale_dtype is None:
scale_dtype = input.dtype
Expand All @@ -342,21 +442,22 @@ def choose_qparams_affine(
min_val_neg = min_val
max_val_pos = max_val

if mapping_type == MappingType.SYMMETRIC:
if mapping_type == MappingType.SYMMETRIC.name:
max_val_pos = torch.max(-min_val_neg, max_val_pos)
scale = max_val_pos / (float(quant_max - quant_min) / 2)
if not preserve_zero:
raise ValueError("preserve_zero == False is not supported for symmetric quantization")
if zero_point_domain != ZeroPointDomain.INT:
if zero_point_domain != ZeroPointDomain.INT.name:
raise ValueError("zero_point_domain != ZeroPointDomain.INT is not supported for symmetric quantization")
zero_point = torch.full_like(scale, int((quant_max + quant_min + 1) / 2))
else:
assert mapping_type == MappingType.ASYMMETRIC.name
scale = (max_val_pos - min_val_neg) / float(quant_max - quant_min)
if preserve_zero:
zero_point = quant_min - torch.round(min_val_neg / scale)
zero_point = torch.clamp(zero_point, quant_min, quant_max)
else:
assert zero_point_domain == ZeroPointDomain.FLOAT, "if not preserve_zero, zero_point must be in FLOAT domain"
assert zero_point_domain == ZeroPointDomain.FLOAT.name, "if not preserve_zero, zero_point must be in FLOAT domain"
mid_point = (quant_max + quant_min + 1) / 2
zero_point = min_val_neg + scale * mid_point

Expand Down

0 comments on commit 5186214

Please sign in to comment.