update int8 quant api

namgyu-youn · namgyu-youn · commit 49a7a897220b · 2025-10-23T21:58:46.000+09:00
diff --git a/torchao/quantization/quantize_/workflows/int8/int8_tensor.py b/torchao/quantization/quantize_/workflows/int8/int8_tensor.py
@@ -141,6 +141,9 @@ def from_hp(
     def dequantize(self, output_dtype: Optional[torch.dtype] = None) -> torch.Tensor:
         """Dequantize int8 tensor to floating point"""
 
+        if output_dtype is None:
+            output_dtype = self.dtype
+
         qdata_fp = self.qdata.to(output_dtype)
         # Reshape scale to broadcast if granularity is block-wise
         scale_expanded = _maybe_expand_scale_to_tensor_shape(
@@ -153,12 +156,6 @@ def dequantize(self, output_dtype: Optional[torch.dtype] = None) -> torch.Tensor
 implements_torch_function = Int8Tensor.implements_torch_function
 
 
-@implements([aten.dequantize.self])
-def _(func, types, args, kwargs):
-    """dequantization: int8 -> float"""
-    return args[0].dequantize()
-
-
 @implements(aten.linear.default)
 @implements_torch_function(torch.nn.functional.linear)
 def _(func, types, args, kwargs):