[FQ2I] Add leaky relu to FQ21 (#10378)

margaretqian · Margaret Qian · web-flow · commit 8418026ff6ff · 2022-03-14T11:27:36.000-07:00
* add leaky relu op + passing unit test

* passing test

* format

* clean up

* lekay relu qnn op

* wip

* qnn op

* add comment

* lint

Co-authored-by: Margaret Qian &lt;mqian@octoml.ai&gt;
diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
@@ -1050,3 +1050,30 @@ def batch_matmul(x, y, x_zero_point, y_zero_point, x_scale, y_scale, out_dtype="
 # register fuse pattern for qnn ops
 reg.register_pattern("qnn.quantize", OpPattern.OPAQUE)
 reg.register_pattern("qnn.dequantize", OpPattern.OPAQUE)
+
+
+def leaky_relu(x, alpha, scale, zero_point):
+    """Quantized leaky relu.
+
+    Parameters
+    ----------
+    x : relay.Expr
+        The quantized input tensor.
+    alpha: double
+        The alpha value.
+    scale: relay.Expr
+        The scale of the quantized expr.
+    zero_point: relay.Expr
+       The zero point of quantized expr.
+
+    Returns
+    -------
+    result : relay.Expr
+        The computed result.
+    """
+    return _make.leaky_relu(
+        x,
+        alpha,
+        scale,
+        zero_point,
+    )
diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -346,6 +346,16 @@ def relu(expr, type_map):
     return [relay.op.maximum(arg, fold_constant(zero)), t]
 
 
+@register_fake_quantization_to_integer("nn.leaky_relu")
+def leaky_relu(expr, type_map):
+    """Rewrite a leaky relu op"""
+    arg = expr.args[0]
+    t = type_map[arg]
+    alpha = expr.attrs.alpha
+    output = relay.qnn.op.leaky_relu(expr, alpha, t.scale, t.zero_point)
+    return [output, t]
+
+
 @register_fake_quantization_to_integer("nn.pad")
 def pad(expr, type_map):
     """Rewite an nn.pad op"""
diff --git a/src/relay/qnn/op/leaky_relu.cc b/src/relay/qnn/op/leaky_relu.cc
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/qnn/op/leaky_relu.cc
+ * \brief QNN leaky relu operator.
+ */
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/op_attr_types.h>
+
+#include "op_common.h"
+
+namespace tvm {
+namespace relay {
+namespace qnn {
+
+bool QnnLeakyReluRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+                     const TypeReporter& reporter) {
+  // Expected Types: data, scale, zero_point
+  ICHECK_EQ(types.size(), 4);
+  const auto* x = types[0].as<TensorTypeNode>();
+  if (x == nullptr) return false;
+  ICHECK(x->dtype == DataType::Int(8) || x->dtype == DataType::UInt(8))
+      << "Expected quantized leaky_relu type(int8, uint8) for input but was " << x->dtype;
+  const auto* param = attrs.as<LeakyReluAttrs>();
+  ICHECK(param != nullptr) << "LeakyReluAttrs cannot be nullptr.";
+
+  // Check the types of scale and zero points.
+  for (size_t i = 1; i < 3; ++i) {
+    if (types[i].as<IncompleteTypeNode>()) {
+      return false;
+    }
+  }
+
+  ICHECK(IsScalarType(types[1], DataType::Float(32)));  // scale
+  ICHECK(IsScalarType(types[2], DataType::Int(32)));    // zero_point
+
+  // Assign types for scale and zero points.
+  reporter->Assign(types[1], TensorType({}, DataType::Float(32)));  // scale
+  reporter->Assign(types[2], TensorType({}, DataType::Int(32)));    // zero_point
+
+  // Collect the input tensor and output tensor devoid of scale and zero points to reuse Relay
+  // IdentityRel infer type function.
+  Array<Type> tensor_types = {types[0], types[3]};
+  return IdentityRel(tensor_types, 2, attrs, reporter);
+}
+
+// Positional relay function to create quantized leaky relu operator used by frontend FFI.
+Expr MakeQuantizedLeakyRelu(Expr x, double alpha, Expr scale, Expr zero_point) {
+  auto attrs = make_object<LeakyReluAttrs>();
+  attrs->alpha = alpha;
+  static const Op& op = Op::Get("qnn.leaky_relu");
+  return Call(op, {x, scale, zero_point}, Attrs(attrs), {});
+}
+
+/*
+ * \brief Canonicalizes the QNN leaky relu op.
+ * \param attrs The empty attribute.
+ * \param new_args The new mutated args to the call node.
+ * \param arg_types The types of input and output.
+ * \return The sequence of Relay ops for leaky relu op.
+ */
+Expr QnnLeakyReluCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
+                              const Array<tvm::relay::Type>& arg_types) {
+  // We rely on fixed point arithmetic to preserve the precision of multiplication
+  // by a small alpha value < 1.
+  //
+  // We assume the same scale and zero point for alpha and the input tensor.
+  // Let T = s(q_t - z) where q_t is the input arg[0]
+  // Then, the quantized value of alpha * T is:
+  // q(a * T, s, z) = [(a * T) / s] + z = a * s(q_t - z) / s + z = a * (q_t - z) + z
+  // = a * q_t + (1 - a) * z
+  //
+  // We return the quantized value of alpha * T for all values q_t < input_zero_point.
+
+  ICHECK_EQ(new_args.size(), 3);
+  Expr quantized_data = Cast(new_args[0], DataType::Int(32));
+  Expr input_zero_point = Cast(new_args[2], DataType::Int(32));
+
+  const auto* q_attrs = attrs.as<LeakyReluAttrs>();
+  auto alpha = q_attrs->alpha;
+
+  int32_t fixed_point_multiplier, shift;
+  std::tie(fixed_point_multiplier, shift) = GetFixedPointMultiplierShift(alpha);
+  auto prod = FixedPointMultiply(quantized_data, fixed_point_multiplier, shift);
+
+  int32_t fixed_point_multiplier_z, shift_z;
+  std::tie(fixed_point_multiplier_z, shift_z) = GetFixedPointMultiplierShift(1 - alpha);
+  auto scaled_z = FixedPointMultiply(input_zero_point, fixed_point_multiplier_z, shift_z);
+
+  auto add = Add(prod, scaled_z);
+  auto output = Where(Less(quantized_data, input_zero_point), add, quantized_data);
+
+  const auto* input_type = arg_types[0].as<TensorTypeNode>();
+  return ConvertDtype(output, input_type->dtype);
+}
+
+RELAY_REGISTER_OP("qnn.leaky_relu")
+    .describe("Leaky relu for quantized tensors.")
+    .set_attrs_type<LeakyReluAttrs>()
+    .set_num_inputs(3)
+    .add_argument("data", "Quantized Tensor", "The input data.")
+    .add_argument("scale", "Tensor", "The quantization scale of the input tensor.")
+    .add_argument("zero_point", "Tensor", "The quantization zero_point of the input tensor.")
+    .set_support_level(11)
+    .add_type_rel("QLeakyRelu", QnnLeakyReluRel)
+    .set_attr<TNonComputational>("TNonComputational", true)
+    .set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnLeakyReluCanonicalize);
+
+TVM_REGISTER_GLOBAL("relay.qnn.op._make.leaky_relu").set_body_typed(MakeQuantizedLeakyRelu);
+
+}  // namespace qnn
+}  // namespace relay
+}  // namespace tvm
diff --git a/tests/python/relay/test_op_qnn_leaky_relu.py b/tests/python/relay/test_op_qnn_leaky_relu.py
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import tvm
+import numpy as np
+from tvm import relay
+
+
+def dequantize(data, scale, zp):
+    return scale * (np.asarray(data) - zp)
+
+
+def generate_golden_output(x_data, dequantized_x, alpha, scale, zero_point):
+    prod = np.multiply(dequantized_x, alpha)
+    prod = np.around(prod / scale + zero_point)
+
+    output = np.where(x_data < zero_point, prod, x_data)
+    return output
+
+
+def test_qnn_leaky_relu():
+    data_dtype = "uint8"
+    scale = 0.125
+    zero_point = 60
+    alpha = 0.9
+
+    x = relay.var("x", shape=(1, 4), dtype=data_dtype)
+    y = relay.qnn.op.leaky_relu(
+        x=x,
+        alpha=alpha,
+        scale=relay.const(scale, "float32"),
+        zero_point=relay.const(zero_point, "int32"),
+    )
+
+    func = relay.Function([x], y)
+    mod = tvm.IRModule.from_expr(func)
+    mod = relay.transform.InferType()(mod)
+    mod = relay.qnn.transform.CanonicalizeOps()(mod)
+    func = mod["main"]
+
+    x_data = np.array((255, 133, 0, 9)).reshape((1, 4))
+    x_dequantized = dequantize(x_data, scale, zero_point)
+    golden_output = generate_golden_output(x_data, x_dequantized, alpha, scale, zero_point)
+
+    op_res = relay.create_executor("graph", device=tvm.cpu(0), target="llvm").evaluate(func)(x_data)
+
+    np.testing.assert_equal(op_res.numpy(), golden_output)
+
+
+if __name__ == "__main__":
+    test_qnn_leaky_relu()
diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py
@@ -551,6 +551,18 @@ def test_fake_quantize_relu_per_channel():
     compare_fq_to_int(op, [x_np])
 
 
+def test_fake_quantize_leaky_relu():
+    x = relay.var("x", shape=[1, 3, 224, 224], dtype="uint8")
+
+    x = relay.qnn.op.dequantize(x, relay.const(2.0), relay.const(114))
+    op = relay.op.nn.leaky_relu(x, 0.1)
+    op = relay.qnn.op.quantize(op, relay.const(2.0), relay.const(114), out_dtype="uint8")
+
+    x_np = np.random.randint(0, 255, size=[1, 3, 224, 224], dtype="uint8")
+
+    compare_fq_to_int(op, [x_np], True)
+
+
 @pytest.mark.parametrize(
     "operator",
     [relay.op.add, relay.op.multiply, relay.op.subtract, relay.op.minimum, relay.op.maximum],