pytorch
diff --git a/‎backends/cadence/aot/ops_registrations.py‎
Lines changed: 108 additions & 31 deletions b/‎backends/cadence/aot/ops_registrations.py‎
Lines changed: 108 additions & 31 deletions
diff --git a/‎backends/cadence/aot/ref_implementations.py‎
Lines changed: 36 additions & 31 deletions b/‎backends/cadence/aot/ref_implementations.py‎
Lines changed: 36 additions & 31 deletions
diff --git a/‎backends/cadence/aot/replace_ops.py‎
Lines changed: 41 additions & 26 deletions b/‎backends/cadence/aot/replace_ops.py‎
Lines changed: 41 additions & 26 deletions
@@ -351,10 +351,6 @@ def register_fake(
     "quantized_matmul_asym8uxasym8u_asym8u.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed=False, *, Tensor(a!) out) -> Tensor(a!)"
 )
 
-lib.define(
-    "convolution(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, "
-    "int[] dilation, int groups, bool channel_last=False) -> (Tensor Y)"
-)
 lib.define(
     "transposed_convolution(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, "
     "int[] dilation, SymInt[] output_padding, int groups, bool channel_last=False) -> (Tensor Y)"
@@ -489,8 +485,28 @@ def register_fake(
 # ------------------------------------ #
 # Migrated from the custom_ops.yaml files containing different operator variants (e.g., .out, .tensor_out)
 lib.define(
-    "convolution.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, "
-    "int groups, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)"
+    "conv1d(Tensor input, Tensor weight, Tensor bias, int[1] stride, SymInt[1] padding, int[1] dilation, "
+    "int groups) -> Tensor"
+)
+lib.define(
+    "conv1d.out(Tensor input, Tensor weight, Tensor bias, int[1] stride, SymInt[1] padding, int[1] dilation, "
+    "int groups, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+    "conv2d(Tensor input, Tensor weight, Tensor bias, int[2] stride, SymInt[2] padding, int[2] dilation, "
+    "int groups) -> Tensor"
+)
+lib.define(
+    "conv2d.out(Tensor input, Tensor weight, Tensor bias, int[2] stride, SymInt[2] padding, int[2] dilation, "
+    "int groups, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+    "conv3d(Tensor input, Tensor weight, Tensor bias, int[3] stride, SymInt[3] padding, int[3] dilation, "
+    "int groups) -> Tensor"
+)
+lib.define(
+    "conv3d.out(Tensor input, Tensor weight, Tensor bias, int[3] stride, SymInt[3] padding, int[3] dilation, "
+    "int groups, *, Tensor(a!) out) -> Tensor(a!)"
 )
 lib.define(
     "transposed_convolution.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, "
@@ -2152,41 +2168,102 @@ def quantized_fully_connected_asym8uxasym8u_asym8u_per_tensor_meta(
     return src.new_empty(out_size, dtype=src.dtype)
 
 
-@register_fake("cadence::convolution")
-def convolution_meta(
+@register_fake("cadence::conv1d")
+def conv1d_meta(
     input: torch.Tensor,
     weight: torch.Tensor,
     bias: torch.Tensor,
     stride: Tuple[int],
     padding: Tuple[int],
     dilation: Tuple[int],
     groups: int,
-    channel_last: bool = False,
 ) -> torch.Tensor:
-    if channel_last:
-        out_channels, *kernel_size, _ = weight.shape
-    else:
-        out_channels, _, *kernel_size = weight.shape
+    assert (
+        len(weight.shape) == 3
+    ), f"Conv1d expects a 3D weight, got {len(weight.shape)}D"
+    out_channels, _, kernel_size = weight.shape
     in_size = input.shape
-    # Assert that the input tensor has at least 3 dimensions, and at most 6
-    assert len(in_size) > 2
-    assert len(in_size) < 6
+    assert len(in_size) == 3, f"conv1d expects 3D input, got {len(in_size)}D"
 
-    # Compute the output tensor size
-    output_size = (
-        get_conv1d_output_size(
-            in_size,
-            out_channels,
-            stride[0],
-            padding[0],
-            dilation[0],
-            kernel_size[0],
-            channel_last,
-        )
-        if len(in_size) == 3
-        else get_conv2d_output_size(
-            in_size, out_channels, stride, padding, dilation, kernel_size, channel_last
-        )
+    output_size = get_conv1d_output_size(
+        in_size,
+        out_channels,
+        stride[0],
+        padding[0],
+        dilation[0],
+        kernel_size,
+        False,
+    )
+
+    return input.new_empty(output_size, dtype=input.dtype)
+
+
+@register_fake("cadence::conv2d")
+def conv2d_meta(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    bias: torch.Tensor,
+    stride: Tuple[int],
+    padding: Tuple[int],
+    dilation: Tuple[int],
+    groups: int,
+) -> torch.Tensor:
+    assert (
+        len(weight.shape) == 4
+    ), f"Conv2d expects a 4D weight, got {len(weight.shape)}D"
+    out_channels, _, *kernel_size = weight.shape
+    in_size = input.shape
+    assert len(in_size) == 4, f"conv2d expects 4D input, got {len(in_size)}D"
+
+    output_size = get_conv2d_output_size(
+        in_size, out_channels, stride, padding, dilation, kernel_size, False
+    )
+
+    return input.new_empty(output_size, dtype=input.dtype)
+
+
+@register_fake("cadence::conv3d")
+def conv3d_meta(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    bias: torch.Tensor,
+    stride: Tuple[int, int, int],
+    padding: Tuple[int, int, int],
+    dilation: Tuple[int, int, int],
+    groups: int,
+) -> torch.Tensor:
+    assert (
+        len(weight.shape) == 5
+    ), f"Conv3d expects a 5D weight, got {len(weight.shape)}D"
+    out_channels, _, *kernel_size = weight.shape
+    in_size = input.shape
+    assert len(in_size) == 5, f"conv3d expects 5D input, got {len(in_size)}D"
+
+    # Helper to compute 3D convolution output size
+    def get_conv3d_output_size(
+        in_size: torch.Size,
+        out_channels: int,
+        stride: Tuple[int, int, int],
+        padding: Tuple[int, int, int],
+        dilation: Tuple[int, int, int],
+        kernel_size: list[int],
+    ) -> torch.Size:
+        N, C, D, H, W = in_size
+
+        dout = (D + 2 * padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) // stride[
+            0
+        ] + 1
+        hout = (H + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) // stride[
+            1
+        ] + 1
+        wout = (W + 2 * padding[2] - dilation[2] * (kernel_size[2] - 1) - 1) // stride[
+            2
+        ] + 1
+
+        return torch.Size((N, out_channels, dout, hout, wout))
+
+    output_size = get_conv3d_output_size(
+        in_size, out_channels, stride, padding, dilation, kernel_size
     )
 
     return input.new_empty(output_size, dtype=input.dtype)
 
@@ -1334,48 +1334,53 @@ def quantized_conv1d_nlc_asym8sxsym8s_asym8s_per_tensor() -> torch.Tensor: ...
 def quantized_conv1d_nlc_asym8uxsym8u_asym8u_per_tensor() -> torch.Tensor: ...
 
 
-@impl_tracked(m, "convolution")
-def convolution(
+@impl_tracked(m, "conv1d")
+def conv1d(
+    input_tensor: torch.Tensor,
+    weight: torch.Tensor,
+    bias: torch.Tensor,
+    stride: tuple[int],
+    padding: tuple[int],
+    dilation: tuple[int],
+    groups: int,
+) -> torch.Tensor:
+    conv_out = torch.nn.functional.conv1d(
+        input_tensor, weight, bias, stride[0], padding[0], dilation[0], groups
+    )
+
+    return conv_out
+
+
+@impl_tracked(m, "conv2d")
+def conv2d(
     input_tensor: torch.Tensor,
     weight: torch.Tensor,
     bias: torch.Tensor,
     stride: tuple[int, int],
     padding: tuple[int, int],
     dilation: tuple[int, int],
     groups: int,
-    channel_last: bool = False,
 ) -> torch.Tensor:
-    conv_is_1d = len(input_tensor.shape) == 3
-    if channel_last:
-        if conv_is_1d:
-            input_tensor = input_tensor.movedim(-1, 1).contiguous()
-            if len(weight.shape) != 3:
-                raise ValueError("Weight tensor must be 3D if input is 3D")
-            weight = weight.movedim(-1, 1).contiguous()
-        else:
-            input_tensor = input_tensor.movedim(-1, -3)
-            if len(weight.shape) != 4:
-                raise ValueError("Weight tensor must be 4D if input is nd > 3")
-            weight = torch.permute(weight, (0, -1, 1, 2)).contiguous()
+    conv_out = torch.nn.functional.conv2d(
+        input_tensor, weight, bias, stride, padding, dilation, groups
+    )
 
-    _stride: tuple[int, int] | int = stride
-    _padding: tuple[int, int] | int = padding
-    _dilation: tuple[int, int] | int = dilation
+    return conv_out
 
-    if conv_is_1d:
-        conv = torch.nn.functional.conv1d
-        _stride = stride[0]
-        _padding = padding[0]
-        _dilation = dilation[0]
-    else:
-        conv = torch.nn.functional.conv2d
 
-    conv_out = conv(input_tensor, weight, bias, _stride, _padding, _dilation, groups)
-    if channel_last:
-        if conv_is_1d:
-            conv_out = conv_out.movedim(1, -1).contiguous()
-        else:
-            conv_out = conv_out.movedim(-3, -1).contiguous()
+@impl_tracked(m, "conv3d")
+def conv3d(
+    input_tensor: torch.Tensor,
+    weight: torch.Tensor,
+    bias: torch.Tensor,
+    stride: tuple[int, int, int],
+    padding: tuple[int, int, int],
+    dilation: tuple[int, int, int],
+    groups: int,
+) -> torch.Tensor:
+    conv_out = torch.nn.functional.conv3d(
+        input_tensor, weight, bias, stride, padding, dilation, groups
+    )
 
     return conv_out
 
 
@@ -452,14 +452,16 @@ class ReplaceConvolutionOptionalArgsWithConcreteArgsPass(ExportPass):
     def call_operator(self, op, args, kwargs, meta):
         op_packet = get_edge_overload_packet(op)
         if op_packet not in {
-            exir_ops.edge.cadence.convolution,
+            exir_ops.edge.cadence.conv1d,
+            exir_ops.edge.cadence.conv2d,
+            exir_ops.edge.cadence.conv3d,
             exir_ops.edge.cadence.transposed_convolution,
         }:
             return super().call_operator(op, args, kwargs, meta)
 
         is_transposed = op_packet == exir_ops.edge.cadence.transposed_convolution
-        expected_args = 9 if is_transposed else 8
-        assert len(args) == expected_args
+        num_expected_args = 9 if is_transposed else 7
+        assert len(args) == num_expected_args
         # Check if the bias is already concrete
         if args[2] is not None:
             return super().call_operator(op, args, kwargs, meta)
@@ -684,20 +686,28 @@ def call_operator(self, op, args, kwargs, meta):
             output_padding,
             groups,
         ) = args
-        # Currently we only handle conversion to conv1d and conv2d, therefore
+        # Currently we only handle conversion to conv1d, conv2d, and conv3d, therefore
         # verify that the stride, padding, dilation, and output_padding have
-        # len <=2.
+        # len <=3.
         assert (
-            len(stride) == len(padding) == len(dilation) == len(output_padding) == 1
-        ) or (
-            len(stride) == len(padding) == len(dilation) == len(output_padding) == 2
-        ), "Can only map convolution to conv1d and conv2d at present"
-
-        target = (
-            exir_ops.edge.cadence.transposed_convolution.default
-            if transposed
-            else exir_ops.edge.cadence.convolution.default
-        )
+            (len(stride) == len(padding) == len(dilation) == len(output_padding) == 1)
+            or (
+                len(stride) == len(padding) == len(dilation) == len(output_padding) == 2
+            )
+            or (
+                len(stride) == len(padding) == len(dilation) == len(output_padding) == 3
+            )
+        ), "Can only map convolution to conv1d, conv2d, and conv3d at present"
+
+        # Determine if this is 1D, 2D, or 3D convolution based on parameter lengths
+        if transposed:
+            target = exir_ops.edge.cadence.transposed_convolution.default
+        elif len(stride) == 1:
+            target = exir_ops.edge.cadence.conv1d.default
+        elif len(stride) == 2:
+            target = exir_ops.edge.cadence.conv2d.default
+        else:  # len(stride) == 3
+            target = exir_ops.edge.cadence.conv3d.default
 
         if transposed:
             # Flip the height and width dimensions of weight, since we apply a
@@ -756,7 +766,6 @@ def call_operator(self, op, args, kwargs, meta):
                 padding,
                 dilation,
                 groups,
-                False,
             )
 
         return super().call_operator(target, new_args, kwargs, meta)
@@ -778,7 +787,9 @@ class ReplaceTrivialConvWithLinear(ExportPass):
     """
 
     trivial_conv_op_to_linear_op: Dict[EdgeOpOverload, EdgeOpOverload] = {
-        exir_ops.edge.cadence.convolution.default: exir_ops.edge.aten.linear.default,
+        exir_ops.edge.cadence.conv1d.default: exir_ops.edge.aten.linear.default,
+        exir_ops.edge.cadence.conv2d.default: exir_ops.edge.aten.linear.default,
+        exir_ops.edge.cadence.conv3d.default: exir_ops.edge.aten.linear.default,
         exir_ops.edge.cadence.quantized_conv2d_nchw.per_tensor: exir_ops.edge.cadence.quantized_linear.per_tensor,
         exir_ops.edge.cadence.quantized_conv2d_nhwc.per_tensor: exir_ops.edge.cadence.quantized_linear.per_tensor,
     }
@@ -795,7 +806,7 @@ def call_operator(self, op, args, kwargs, meta):
             op == exir_ops.edge.cadence.quantized_conv2d_nchw.per_tensor
             or op == exir_ops.edge.cadence.quantized_conv2d_nhwc.per_tensor
         )
-        assert (len(args) == 8 and not quantized_op) or (
+        assert (len(args) == 7 and not quantized_op) or (
             len(args) >= 12 and quantized_op
         ), "Inconsistent args for convolution"
         (in_tensor, weight, bias, stride, padding, dilation, groups) = args[0:7]
@@ -950,7 +961,9 @@ def call_operator(
         meta: NodeMetadata,
     ) -> ProxyValue:
         if op not in {
-            exir_ops.edge.cadence.convolution.default,
+            exir_ops.edge.cadence.conv1d.default,
+            exir_ops.edge.cadence.conv2d.default,
+            exir_ops.edge.cadence.conv3d.default,
             exir_ops.edge.cadence.quantized_conv2d_nchw.per_tensor,
         }:
             return super().call_operator(op, args, kwargs, meta)
@@ -961,11 +974,11 @@ def call_operator(
             # Already in NHWC layout.
             return super().call_operator(op, args, kwargs, meta)
 
-        new_op = (
-            exir_ops.edge.cadence.quantized_conv2d_nhwc.per_tensor
-            if quantized_op
-            else exir_ops.edge.cadence.convolution.default
-        )
+        if quantized_op:
+            new_op = exir_ops.edge.cadence.quantized_conv2d_nhwc.per_tensor
+        else:
+            # Determine if 1D or 2D convolution based on op
+            new_op = op
 
         input_proxy = cast(ProxyValue, args[0])
         weight_proxy = cast(ProxyValue, args[1])
@@ -1038,7 +1051,9 @@ class ReplaceConvWithIm2RowAndLinear(ExportPass):
     # A map from the convolution op to the linear op that it should
     # decompose to.
     conv_op_to_linear_op: Dict[EdgeOpOverload, EdgeOpOverload] = {
-        exir_ops.edge.cadence.convolution.default: exir_ops.edge.aten.linear.default,
+        exir_ops.edge.cadence.conv1d.default: exir_ops.edge.aten.linear.default,
+        exir_ops.edge.cadence.conv2d.default: exir_ops.edge.aten.linear.default,
+        exir_ops.edge.cadence.conv3d.default: exir_ops.edge.aten.linear.default,
         exir_ops.edge.cadence.quantized_conv2d_nchw.per_tensor: exir_ops.edge.cadence.quantized_linear.per_tensor,
         exir_ops.edge.cadence.quantized_conv2d_nhwc.per_tensor: exir_ops.edge.cadence.quantized_linear.per_tensor,
     }
@@ -1052,7 +1067,7 @@ def call_operator(self, op, args, kwargs, meta):
             op == exir_ops.edge.cadence.quantized_conv2d_nchw.per_tensor
             or op == exir_ops.edge.cadence.quantized_conv2d_nhwc.per_tensor
         )
-        assert (len(args) == 8 and not quantized_op) or (
+        assert (len(args) == 7 and not quantized_op) or (
             len(args) >= 12 and quantized_op
         ), "Inconsistent args for convolution"
         (in_tensor, weight, bias, stride, padding, dilation, groups) = args[0:7]