pytorch
diff --git a/‎test/prototype/mx_formats/test_mx_linear.py‎
Lines changed: 32 additions & 0 deletions b/‎test/prototype/mx_formats/test_mx_linear.py‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎torchao/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎torchao/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎torchao/core/config.py‎
Lines changed: 1 addition & 0 deletions b/‎torchao/core/config.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎torchao/prototype/mx_formats/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎torchao/prototype/mx_formats/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎torchao/prototype/mx_formats/mx_funcs.py‎
Lines changed: 43 additions & 0 deletions b/‎torchao/prototype/mx_formats/mx_funcs.py‎
Lines changed: 43 additions & 0 deletions
@@ -25,6 +25,7 @@
     MXInferenceLinear,
     MXLinear,
 )
+from torchao.prototype.mx_formats.mx_subclass import MXFPInferenceConfig
 from torchao.quantization import quantize_
 from torchao.quantization.utils import compute_error
 from torchao.utils import (
@@ -372,3 +373,34 @@ def test_inference_print_str():
     s = str(m)
     assert "bl_sz=32" in s
     assert "kernel=emulated" in s
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+@pytest.mark.skipif(
+    not TORCH_VERSION_AT_LEAST_2_8, reason="torch.compile requires PyTorch 2.8+"
+)
+@pytest.mark.skipif(not is_sm_at_least_100, reason="Reqs sm100")
+@pytest.mark.parametrize("elem_dtype", [torch.float8_e4m3fn])
+@pytest.mark.parametrize("bias", [True, False])
+@pytest.mark.parametrize("compile", [True, False])
+@torch.no_grad()
+def test_inference_subclass(elem_dtype, bias: bool, compile: bool):
+    """
+    Smoke test for inference compile
+    """
+    if elem_dtype in (torch.float8_e4m3fn, torch.float8_e5m2):
+        if not is_sm_at_least_89():
+            pytest.skip("CUDA capability >= 8.9 required for float8 in triton")
+
+    m = nn.Linear(32, 128, bias=bias, dtype=torch.bfloat16, device="cuda")
+    m_mx = copy.deepcopy(m)
+    config = MXFPInferenceConfig()
+    quantize_(m_mx, config=config)
+    if compile:
+        m_mx = torch.compile(m_mx, fullgraph=True)
+
+    x = torch.randn(128, 32, device="cuda", dtype=torch.bfloat16)
+    y_ref = m(x)
+    y_mx = m_mx(x)
+    sqnr = compute_error(y_ref, y_mx)
+    assert sqnr >= 25.0, f"Got a sqnr of {sqnr} for {elem_dtype} and bias={bias}"
@@ -43,7 +43,7 @@
     quantize_,
 )
 
-from . import dtypes, optim, testing
+from . import dtypes, optim, quantization, testing
 
 __all__ = [
     "dtypes",
@@ -52,4 +52,5 @@
     "quantize_",
     "testing",
     "ops",
+    "quantization",
 ]
@@ -175,6 +175,7 @@ def config_to_dict(config: AOBaseConfig) -> Dict[str, Any]:
     "torchao.quantization",
     "torchao.sparsity.sparse_api",
     "torchao.prototype.quantization",
+    "torchao.prototype.mx_formats",
 }
 
 
 
@@ -5,6 +5,9 @@
     MXLinearRecipeName,
 )
 
+# Note: Prototype and subject to change
+from torchao.prototype.mx_formats.mx_subclass import MXFPInferenceConfig
+
 # import mx_linear here to register the quantize_ transform logic
 # ruff: noqa: I001
 import torchao.prototype.mx_formats.mx_linear  # noqa: F401
@@ -14,4 +17,5 @@
     "MXInferenceLinearConfig",
     "MXLinearConfig",
     "MXLinearRecipeName",
+    "MXFPInferenceConfig",
 ]
@@ -0,0 +1,43 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+This file defines the top level torch ops that are extended by MXTensor
+See: https://docs.pytorch.org/docs/stable/notes/extending.html#extending-torch-with-a-tensor-wrapper-type
+for more details.
+"""
+
+from typing import Any, Dict
+
+import torch
+
+from torchao.prototype.mx_formats.mx_ops import _addmm_mx_dispatch
+from torchao.prototype.mx_formats.mx_tensor import (  # noqa: E501
+    MXTensor,
+)
+
+aten = torch.ops.aten
+
+MX_FUNC_TABLE: Dict[Any, Any] = {}
+
+
+def implements_func(torch_ops):
+    """Register torch ops to the mx op table for torch function"""
+
+    def decorator(func):
+        for op in torch_ops:
+            MX_FUNC_TABLE[op] = func
+        return func
+
+    return decorator
+
+
+@implements_func([aten.linear.default])
+def mx_linear(func, types, args, kwargs):
+    a, b = args[0], args[1]
+    assert isinstance(a, MXTensor) and isinstance(b, MXTensor)
+    bias = args[2] if len(args) == 3 else None
+    return _addmm_mx_dispatch(a, b.t(), func, bias=bias)
Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@`
`43`	`43`	`quantize_,`
`44`	`44`	`)`
`45`	`45`
`46`		`-from . import dtypes, optim, testing`
	`46`	`+from . import dtypes, optim, quantization, testing`
`47`	`47`
`48`	`48`	`__all__ = [`
`49`	`49`	`"dtypes",`
`@@ -52,4 +52,5 @@`
`52`	`52`	`"quantize_",`
`53`	`53`	`"testing",`
`54`	`54`	`"ops",`
	`55`	`+ "quantization",`
`55`	`56`	`]`
Original file line number	Diff line number	Diff line change
`@@ -175,6 +175,7 @@ def config_to_dict(config: AOBaseConfig) -> Dict[str, Any]:`
`175`	`175`	`"torchao.quantization",`
`176`	`176`	`"torchao.sparsity.sparse_api",`
`177`	`177`	`"torchao.prototype.quantization",`
	`178`	`+ "torchao.prototype.mx_formats",`
`178`	`179`	`}`
`179`	`180`
`180`	`181`
Original file line number	Diff line number	Diff line change
`@@ -5,6 +5,9 @@`
`5`	`5`	`MXLinearRecipeName,`
`6`	`6`	`)`
`7`	`7`
	`8`	`+# Note: Prototype and subject to change`
	`9`	`+from torchao.prototype.mx_formats.mx_subclass import MXFPInferenceConfig`
	`10`	`+`
`8`	`11`	`# import mx_linear here to register the quantize_ transform logic`
`9`	`12`	`# ruff: noqa: I001`
`10`	`13`	`import torchao.prototype.mx_formats.mx_linear # noqa: F401`
`@@ -14,4 +17,5 @@`
`14`	`17`	`"MXInferenceLinearConfig",`
`15`	`18`	`"MXLinearConfig",`
`16`	`19`	`"MXLinearRecipeName",`
	`20`	`+ "MXFPInferenceConfig",`
`17`	`21`	`]`