Add 16A8W support for view and transpose operations

Ninja91 · Ninja91 · commit 8ff1bf1214b3 · 2025-09-10T22:06:38.000-07:00
Pull Request resolved: #13799 Add 16A8W quantization support for view and transpose operations in ExecutorTorch ARM backend. This follows the pattern established for linear, mul, sigmoid, tanh, and slice operations, extending int16 support to view and transpose operations. Changes: - Add INT16 dtype validation support in op_transpose.py - Add test_view_tensor_16a8w_tosa_INT test function - Enable test_view.py in test targets configuration The 16A8W configuration uses 16-bit activations with 8-bit weights, enabling higher precision for activations while maintaining weight efficiency. ghstack-source-id: 308986670 @exported-using-ghexport Differential Revision: [D80511313](https://our.internmc.facebook.com/intern/diff/D80511313/)
diff --git a/backends/arm/test/ops/test_view.py b/backends/arm/test/ops/test_view.py
@@ -9,9 +9,14 @@
 
 from typing import Tuple
 
+import pytest
 import torch
+from executorch.backends.arm.quantizer.arm_quantizer import (
+    get_symmetric_a16w8_quantization_config,
+    TOSAQuantizer,
+)
 
-from executorch.backends.arm.test import common
+from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.test_pipeline import (
     EthosU55PipelineINT,
     EthosU85PipelineINT,
@@ -20,6 +25,8 @@
     TosaPipelineINT,
     VgfPipeline,
 )
+from executorch.backends.arm.tosa.specification import TosaSpecification
+from executorch.backends.xnnpack.test.tester import Quantize
 
 aten_op = "torch.ops.aten.view.default"
 
@@ -147,3 +154,108 @@ def test_view_u85_INT(test_data: Tuple):
         exir_ops=[],
     )
     pipeline.run()
+
+
+def get_symmetric_a16w8_view_quantizer(per_channel_quantization=False):
+    tosa_version = conftest.get_option("tosa_version")
+    tosa_profiles = {
+        "1.0": TosaSpecification.create_from_string("TOSA-1.0+INT+int16"),
+    }
+
+    quantizer = TOSAQuantizer(tosa_profiles[tosa_version])
+    quantizer.set_global(
+        get_symmetric_a16w8_quantization_config(is_per_channel=per_channel_quantization)
+    )
+
+    return Quantize(
+        quantizer,
+        get_symmetric_a16w8_quantization_config(
+            is_per_channel=per_channel_quantization
+        ),
+    )
+
+
+@common.parametrize("test_data", View.needs_transpose_tests)
+@pytest.mark.xfail(
+    reason="missing int16 view ops support; fails at TOSA reference model with Unsupported operation type or rank. See: https://github.com/pytorch/executorch/issues/13977"
+)
+def test_view_16a8w_tosa_INT(test_data: Tuple):
+    """Test view operation with 16A8W quantization (16-bit activations, 8-bit weights)"""
+    per_channel_quantization = False
+    test_tensor, new_shape = test_data()
+
+    pipeline = TosaPipelineINT[input_t1](
+        View(new_shape),
+        (test_tensor,),
+        aten_op,
+        exir_op=[],
+        per_channel_quantization=per_channel_quantization,
+        use_to_edge_transform_and_lower=True,
+        tosa_extensions=["int16"],
+    )
+
+    pipeline.change_args(
+        "quantize",
+        get_symmetric_a16w8_view_quantizer(
+            per_channel_quantization=per_channel_quantization
+        ),
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", View.needs_transpose_tests)
+@common.XfailIfNoCorstone300
+@pytest.mark.xfail(
+    reason="Vela compilation fails with 'Invalid arguments' for int16 view operations"
+)
+def test_view_16a8w_u55_INT16(test_data: Tuple):
+    """Test view operation with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
+    per_channel_quantization = False
+    test_tensor, new_shape = test_data()
+
+    pipeline = EthosU55PipelineINT[input_t1](
+        View(new_shape),
+        (test_tensor,),
+        aten_op,
+        exir_ops=[],
+        per_channel_quantization=per_channel_quantization,
+        use_to_edge_transform_and_lower=True,
+        run_on_fvp=True,
+    )
+
+    pipeline.change_args(
+        "quantize",
+        get_symmetric_a16w8_view_quantizer(
+            per_channel_quantization=per_channel_quantization
+        ),
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", View.needs_transpose_tests)
+@common.XfailIfNoCorstone320
+@pytest.mark.xfail(
+    reason="Vela compilation fails with 'Invalid arguments' for int16 view operations"
+)
+def test_view_16a8w_u85_INT16(test_data: Tuple):
+    """Test view operation with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
+    per_channel_quantization = False
+    test_tensor, new_shape = test_data()
+
+    pipeline = EthosU85PipelineINT[input_t1](
+        View(new_shape),
+        (test_tensor,),
+        aten_op,
+        exir_ops=[],
+        per_channel_quantization=per_channel_quantization,
+        use_to_edge_transform_and_lower=True,
+        run_on_fvp=True,
+    )
+
+    pipeline.change_args(
+        "quantize",
+        get_symmetric_a16w8_view_quantizer(
+            per_channel_quantization=per_channel_quantization
+        ),
+    )
+    pipeline.run()
diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl
@@ -20,6 +20,7 @@ def define_arm_tests():
         "ops/test_slice.py",
         "ops/test_sigmoid.py",
         "ops/test_tanh.py",
+        "ops/test_view.py",
         "ops/test_cos.py",
     ]
 

Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,7 @@ def define_arm_tests():`
`20`	`20`	`"ops/test_slice.py",`
`21`	`21`	`"ops/test_sigmoid.py",`
`22`	`22`	`"ops/test_tanh.py",`
	`23`	`+ "ops/test_view.py",`
`23`	`24`	`"ops/test_cos.py",`
`24`	`25`	`]`
`25`	`26`