[Frontend][PyTorch] Add: Relay stft operator (apache#11190)

Jian Sheng · Boblest Sebastian (ETAS-DEV/XPC-Fe1) · commit ff1a4fb8561f · 2022-05-27T11:42:18.000+02:00
* Add: Relay stft operator

* fix doc

* address PR comments

* address addtional comments
diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h
@@ -536,6 +536,28 @@ struct EinsumAttrs : public tvm::AttrsNode<EinsumAttrs> {
   }
 };  // struct EinsumAttrs
 
+/*! \brief Attributes used in stft operator */
+struct StftAttrs : public tvm::AttrsNode<StftAttrs> {
+  int n_fft;
+  int hop_length;
+  int win_length;
+  bool normalized;
+  bool onesided;
+
+  TVM_DECLARE_ATTRS(StftAttrs, "relay.attrs.StftAttrs") {
+    TVM_ATTR_FIELD(n_fft).set_default(-1).describe("The size of Fourier transform");
+    TVM_ATTR_FIELD(hop_length)
+        .set_default(-1)
+        .describe("The distance between neighboring sliding window frames");
+    TVM_ATTR_FIELD(win_length).set_default(-1).describe("The size of window frame and STFT filter");
+    TVM_ATTR_FIELD(normalized)
+        .set_default(false)
+        .describe("Whether to return the normalized STFT results");
+    TVM_ATTR_FIELD(onesided).set_default(true).describe(
+        "Whether to return onesided result or fill with conjugate symmetry");
+  }
+};  // struct StftAttrs
+
 }  // namespace relay
 }  // namespace tvm
 #endif  // TVM_RELAY_ATTRS_TRANSFORM_H_
diff --git a/python/tvm/relay/frontend/pytorch.py b/python/tvm/relay/frontend/pytorch.py
@@ -277,7 +277,7 @@ def min_max_common(self, name_elemwise, name_reduce, inputs, input_types):
         if len(inputs) == 1:
             data = self.pytorch_promote_types(inputs[:1], input_types[:1])
             return get_relay_op(name_reduce)(data[0])
-        elif len(inputs) >= 2 and isinstance(inputs[1], int):
+        elif len(inputs) >= 2 and isinstance(inputs[1], (list, int)):
             data = self.pytorch_promote_types(inputs[:1], input_types[:1])
             dim = inputs[1]
             keepdims = inputs[2] if len(inputs) > 2 else False
@@ -2188,6 +2188,17 @@ def deform_conv2d(self, inputs, input_types):
 
         return _op.nn.bias_add(conv_out, bias)
 
+    def stft(self, inputs, input_types):
+        data = inputs[0]
+        n_fft = inputs[1]
+        hop_length = inputs[2]
+        win_length = inputs[3]
+        window = inputs[4]
+        normalized = inputs[5]
+        onesided = inputs[6]
+
+        return _op.stft(data, n_fft, hop_length, win_length, window, normalized, onesided)
+
     def unbind(self, inputs, input_types):
         data = inputs[0]
         axis = int(inputs[1])
@@ -2996,6 +3007,9 @@ def create_convert_map(self):
             "aten::sub": self.sub,
             "aten::max": self.max,
             "aten::min": self.min,
+            "aten::amax": self.max,
+            "aten::amin": self.min,
+            "aten::stft": self.stft,
             "aten::mul": self.make_elemwise("multiply"),
             "aten::pow": self.make_elemwise("power"),
             "aten::arange": self.arange,
diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py
@@ -140,6 +140,50 @@ def compute_reshape(attrs, inputs, output_type):
 
 _reg.register_strategy("sparse_reshape", strategy.sparse_reshape_strategy)
 
+# stft
+@_reg.register_compute("stft")
+def compute_stft(attrs, inputs, output_type):
+    """Compute definition of stft"""
+    return topi.stft(
+        inputs[0],
+        attrs.n_fft,
+        attrs.hop_length,
+        attrs.win_length,
+        attrs.window,
+        attrs.normalized,
+        attrs.onesided,
+        output_type.shape,
+    )
+
+
+_reg.register_strategy("stft", strategy.stft_strategy)
+
+
+@script
+def _stft_shape_func(data, n_fft, hop_length, onesided):
+    output_shape = output_tensor((4,), "int64")
+    output_shape[0] = int64(data.shape[0])
+    if onesided:
+        output_shape[1] = int64(int64(n_fft) // int64(2)) + int64(1)
+    else:
+        output_shape[1] = int64(n_fft)
+    output_shape[2] = int64(int64(data.shape[1] - n_fft) // int64(hop_length)) + int64(1)
+    output_shape[3] = int64(2)
+    return output_shape
+
+
+@_reg.register_shape_func("stft", True)
+def stft_shape_func(attrs, inputs, _):
+    """
+    Shape func for stft.
+    """
+    return [
+        _stft_shape_func(
+            inputs[0], convert(attrs.n_fft), convert(attrs.hop_length), convert(attrs.onesided)
+        )
+    ]
+
+
 # scatter_add
 @_reg.register_compute("scatter_add")
 def compute_scatter_add(attrs, inputs, output_type):
diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py
@@ -1313,3 +1313,14 @@ def einsum_strategy_cuda(attrs, inputs, out_type, target):
         name="einsum.cuda",
     )
     return strategy
+
+
+@stft_strategy.register(["cuda", "gpu"])
+def stft_strategy_cuda(attrs, inputs, out_type, target):
+    strategy = _op.OpStrategy()
+    strategy.add_implementation(
+        wrap_compute_stft(topi.cuda.stft),
+        wrap_topi_schedule(topi.generic.schedule_extern),
+        name="stft.cuda",
+    )
+    return strategy
diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py
@@ -1375,6 +1375,39 @@ def _compute_sparse_reshape(attrs, inputs, output_type):
     return _compute_sparse_reshape
 
 
+# stft
+@override_native_generic_func("stft_strategy")
+def stft_strategy(attrs, outs, out_type, target):
+    """stft generic strategy"""
+    strategy = _op.OpStrategy()
+    strategy.add_implementation(
+        wrap_compute_stft(topi.stft),
+        wrap_topi_schedule(topi.generic.schedule_extern),
+        name="stft.generic",
+    )
+    return strategy
+
+
+def wrap_compute_stft(topi_compute):
+    """Wrap stft compute"""
+
+    def _compute_stft(attrs, inputs, output_type):
+        return [
+            topi_compute(
+                inputs[0],
+                attrs.n_fft,
+                attrs.hop_length,
+                attrs.win_length,
+                inputs[1],
+                attrs.normalized,
+                attrs.onesided,
+                output_type.shape,
+            )
+        ]
+
+    return _compute_stft
+
+
 # roi_pool
 @generic_func
 def schedule_roi_pool(attrs, outs, target):
diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py
@@ -1829,3 +1829,63 @@ def invert_permutation(data):
         relay.invert_permutation(data) = [2, 4, 3, 0, 1]
     """
     return _make.invert_permutation(data)
+
+
+def stft(
+    data, n_fft, hop_length=None, win_length=None, window=None, normalized=False, onesided=True
+):
+    """
+    The STFT computes the Fourier transform of short overlapping windows of the input.
+    This gives frequency components of the signal as they change over time.
+
+    Parameters
+    ----------
+    data : relay.Expr
+        Either a 1-D tensor or a 2-D batch tensor.
+
+    n_fft : int
+        The size of Fourier transform
+
+    hop_length : int, optional
+        The distance between neighboring sliding window frames. If is None,
+        it is treated as equal to floor(n_fft / 4).
+
+    win_length : int, optional
+        The size of window frame and STFT filter. If is None, it is treated as equal to n_fft.
+
+    window : relay.Expr, optional
+        A 1-D tensor window frame. If is None (default), it is treated as if
+        having 1 everywhere in the window.
+
+    normalized : bool, optional
+        Whether to return the normalized STFT results. Default value is False.
+
+    onesided : bool, optional
+        Whether to return onesided result or fill with conjugate symmetry. Default value is True.
+
+    Returns
+    -------
+    output : relay.Expr
+        Tensor containing the STFT result with shape [batch, N, T, 2], where N is the
+        number of frequencies where STFT is applied and T is the total number of frames used.
+
+    Examples
+    --------
+    .. code-block:: python
+
+        data = [1, 2, 3, 4, 5, 6]
+        window = [4, 3, 2]
+        [n_fft, hop_length, win_length, normalized, onesided] = [3, 3, 3, False, True]
+        relay.stft(data, n_fft, hop_length, win_length, window, normalized, onesided)
+        -> [[[15.0000,  0.0000], [34.0000,  0.0000]], [[ 4.5000,  0.8660], [ 1.0000, -1.7321]]]
+    """
+    if hop_length is None:
+        hop_length = n_fft // 4
+
+    if win_length is None:
+        win_length = n_fft
+
+    if window is None:
+        window = _make.ones([n_fft], "int32")
+
+    return _make.stft(data, n_fft, hop_length, win_length, window, normalized, onesided)
diff --git a/python/tvm/topi/__init__.py b/python/tvm/topi/__init__.py
@@ -46,6 +46,7 @@
 from .einsum import *
 from .unique import *
 from .searchsorted import *
+from .stft import *
 from . import generic
 from . import nn
 from . import x86
diff --git a/python/tvm/topi/cuda/__init__.py b/python/tvm/topi/cuda/__init__.py
@@ -60,3 +60,4 @@
 from .transform import *
 from .unique import *
 from .searchsorted import *
+from .stft import *
diff --git a/python/tvm/topi/cuda/stft.py b/python/tvm/topi/cuda/stft.py
@@ -0,0 +1,135 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, too-many-arguments, too-many-nested-blocks, unused-argument
+"""STFT operator"""
+from math import pi
+import tvm
+from tvm import te, tir
+from ..utils import ceil_div
+
+
+def _get_max_threads(batch_row):
+    max_threads = tvm.target.Target.current(allow_none=False).max_num_threads
+    return tir.min(batch_row, max_threads)
+
+
+def stft(
+    data,
+    n_fft,
+    hop_length,
+    win_length,
+    window,
+    normalized,
+    onesided,
+    output_shape,
+):
+    """
+    The STFT computes the Fourier transform of short overlapping windows of the input.
+    This gives frequency components of the signal as they change over time.
+    Parameters
+    ----------
+    data : relay.Expr
+        Either a 1-D tensor or a 2-D batch tensor.
+    n_fft : int
+        The size of Fourier transform
+    hop_length : int
+        The distance between neighboring sliding window frames
+    win_length : int
+        The size of window frame and STFT filter
+    window : relay.Expr
+        A 1-D tensor window frame
+    normalized : bool
+        Whether to return the normalized STFT results
+    onesided : bool
+        Whether to return onesided result or fill with conjugate symmetry
+    Returns
+    -------
+    output : relay.Expr
+        Tensor containing the STFT result
+    Examples
+    --------
+    .. code-block:: python
+
+        data = [1, 2, 3, 4, 5, 6]
+        window = [4, 3, 2]
+        [n_fft, hop_length, win_length, normalized, onesided] = [3, 3, 3, False, True]
+        relay.stft(data, n_fft, hop_length, win_length, window, normalized, onesided)
+        -> [[[15.0000,  0.0000], [34.0000,  0.0000]], [[ 4.5000,  0.8660], [ 1.0000, -1.7321]]]
+    """
+
+    def gen_ir(
+        data_ptr,
+        n_fft,
+        hop_length,
+        win_length,
+        window_ptr,
+        normalized,
+        onesided,
+        output_ptr,
+    ):
+        ib = tir.ir_builder.create()
+        data = ib.buffer_ptr(data_ptr)
+        window = ib.buffer_ptr(window_ptr)
+        output = ib.buffer_ptr(output_ptr)
+        max_threads = _get_max_threads(output_ptr.shape[0] * output_ptr.shape[1])
+        output_size = output_ptr.shape[0] * output_ptr.shape[1] * output_ptr.shape[2]
+        with ib.new_scope():
+            nthread_tx = max_threads
+            nthread_bx = ceil_div(output_size, max_threads)
+            tx = te.thread_axis("threadIdx.x")
+            bx = te.thread_axis("blockIdx.x")
+            ib.scope_attr(tx, "thread_extent", nthread_tx)
+            ib.scope_attr(bx, "thread_extent", nthread_bx)
+            tid = bx * max_threads + tx
+
+            with ib.if_scope(tid < output_size):
+                matrix_size = output_ptr.shape[1] * output_ptr.shape[2]
+                batch = tir.floordiv(tid, matrix_size)
+                row = tir.floordiv(tir.indexmod(tid, matrix_size), output_ptr.shape[2])
+                col = tir.indexmod(tir.indexmod(tid, matrix_size), output_ptr.shape[2])
+                output[batch, row, col, 0] = tir.Cast(data_ptr.dtype, 0)
+                output[batch, row, col, 1] = tir.Cast(data_ptr.dtype, 0)
+                with ib.for_range(0, win_length) as wlen:
+                    output[batch, row, col, 0] += (
+                        window[wlen]
+                        * data[batch, col * hop_length + wlen]
+                        * tir.cos(2 * pi * row * wlen / win_length)
+                    )
+                    output[batch, row, col, 1] -= (
+                        window[wlen]
+                        * data[batch, col * hop_length + wlen]
+                        * tir.sin(2 * pi * row * wlen / win_length)
+                    )
+                with ib.if_scope(normalized):
+                    output[batch, row, col, 0] /= tir.sqrt(tir.const(n_fft, "float32"))
+                    output[batch, row, col, 1] /= tir.sqrt(tir.const(n_fft, "float32"))
+
+        return ib.get()
+
+    output_buf = tir.decl_buffer(output_shape, data.dtype, "output_buf")
+
+    return te.extern(
+        output_shape,
+        [data, window],
+        lambda ins, outs: gen_ir(
+            ins[0], n_fft, hop_length, win_length, ins[1], normalized, onesided, outs[0]
+        ),
+        dtype=[data.dtype],
+        out_buffers=[output_buf],
+        name="stft_cuda",
+        tag="stft_cuda",
+    )
diff --git a/python/tvm/topi/stft.py b/python/tvm/topi/stft.py
diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc
diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py
diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py