Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions python/tvm/relay/backend/contrib/ethosu/legalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -1577,6 +1577,88 @@ def __call__(self, *args, **kwargs):
pass


class FullyConnectedRewriter(DFPatternCallback):
"""Legalize Fully Connected (with bias and clip) to an EthosU operator"""

def __init__(self):
super().__init__(require_type=True)
self.pattern = (
wildcard().has_attr({"Composite": ethosu_patterns.FullyConnectedParams.composite_name})
)(wildcard())

def callback(self, pre, post, node_map):
params = ethosu_patterns.FullyConnectedParams(post.op.body)
params.ifm.tensor = post.args[0]
activation_map = {"clip": "CLIP"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we don't expect that dict to expand, so we can just do if activation == "clip": etc


# IFM reshapes
ifm = post.args[0]
if len(params.ifm.shape) != 4 or not params.ifm.shape[1] == params.ifm.shape[2] == 1:
ifm = relay.reshape(ifm, (-1, 1, 1, params.ifm.shape[-1]))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
ifm = relay.reshape(ifm, (-1, 1, 1, params.ifm.shape[-1]))
ifm = relay.reshape(ifm, (1, 1, 1, params.ifm.shape[-1]))

should be safer since the NPU doesn't support IFMs with a batch size anything other than 1 and this kind of fully connected wouldn't be offloaded anyway


# Weight transformations
weights_values = params.weights.values
weights_values_ohwi = np.expand_dims(weights_values, axis=(1, 2))
if params.activation:
activation = activation_map[params.activation.op.name]
clip_min = int(params.activation.attrs.a_min)
clip_max = int(params.activation.attrs.a_max)
else:
activation = "NONE"
clip_min = 0
clip_max = 0
scale_bias = vela_api.pack_biases(
biases=params.biases.tensor.data.asnumpy(),
ifm_scale=params.ifm.q_params.scale_f32,
ifm_dtype=np.dtype(params.ifm.dtype),
weight_scales=params.weights.q_params.scale_f32,
ofm_scale=params.ofm.q_params.scale_f32,
is_activation_tanh_or_sigmoid=False,
)
ethosu_fc = ethosu_ops.ethosu_conv2d(
ifm=ifm,
weight=relay.const(weights_values_ohwi, params.weights.values.dtype),
scale_bias=relay.const(scale_bias, "uint8"),
lut=relay.const([], dtype="int8"),
ifm_scale=float(params.ifm.q_params.scale_f32),
ifm_zero_point=int(params.ifm.q_params.zero_point),
weight_zero_point=int(params.weights.q_params.zero_point),
ofm_scale=float(params.ofm.q_params.scale_f32),
ofm_zero_point=int(params.ofm.q_params.zero_point),
kernel_shape=[1, 1],
ofm_channels=params.weights.shape[0],
strides=(1, 1),
padding=(0, 0, 0, 0),
dilation=(1, 1),
activation=activation,
clip_min=clip_min,
clip_max=clip_max,
upscale="NONE",
ifm_layout="NHWC",
ofm_layout="NHWC",
)

if len(params.ofm.shape) != 4 or not params.ofm.shape[1] == params.ofm.shape[2] == 1:
ethosu_fc = relay.reshape(ethosu_fc, params.ofm.shape)
Comment on lines +1652 to +1653
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect there isn't a test case that exercises this case since on line 1700 this pass runs after the no op legalizer, so the last reshape won't have a following identity op and will fall over in TE

return ethosu_fc


@ir.transform.module_pass(opt_level=1)
class LegalizeFullyConnected:
"""This is the pass that wraps the AddRewriter"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"""This is the pass that wraps the AddRewriter"""
"""This is the pass that wraps the FullyConnectedRewriter"""


def transform_module(
self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
) -> tvm.ir.IRModule:
for global_var, func in mod.functions.items():
func = rewrite(FullyConnectedRewriter(), func)
mod.update_func(global_var, func)
return mod

def __call__(self, *args, **kwargs):
pass


@ir.transform.module_pass(opt_level=1)
class LegalizeEthosU:
"""This is the pass to call graph-rewrites to perform graph transformation
Expand Down Expand Up @@ -1615,6 +1697,7 @@ def transform_module(
mod = LegalizeReshape()(mod)
mod = LegalizeStridedSlice()(mod)
mod = LegalizeNoOps()(mod)
mod = LegalizeFullyConnected()(mod)
return mod

def __call__(self, *args, **kwargs):
Expand Down
14 changes: 14 additions & 0 deletions python/tvm/relay/backend/contrib/ethosu/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,20 @@ class DequantizeArgs(Enum):
IFM_ZERO_POINT = 2


class QDenseArgs(Enum):
"""
This is a helper enum to access the correct index of
qnn.dense arguments
"""

IFM = 0
WEIGHTS = 1
IFM_ZERO_POINT = 2
WEIGHTS_ZERO_POINT = 3
IFM_SCALE = 4
WEIGHTS_SCALE = 5


def is_composite_func(func: relay.Function, name: str) -> bool:
"""
This method checks whether the call is to
Expand Down
105 changes: 105 additions & 0 deletions python/tvm/relay/op/contrib/ethosu.py
Original file line number Diff line number Diff line change
Expand Up @@ -1537,6 +1537,106 @@ def squeeze_pattern():
return is_op("squeeze")(wildcard())


class FullyConnectedParams:
"""
This class will parse a call to an ethos-u.fully_connected composite
function and extract the parameter information.
"""

composite_name = "ethosu.fully_connected"
activation_map = {"clip": "CLIP"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same nit about the clip dict as before :)


@requires_vela
def __init__(self, func_body):
from tvm.relay.backend.contrib.ethosu.util import QDenseArgs # type: ignore
from tvm.relay.backend.contrib.ethosu.util import BiasAddArgs
from tvm.relay.backend.contrib.ethosu.util import RequantArgs

activation = None
if str(func_body.op) in self.activation_map.keys():
activation = func_body
requantize_op = activation.args[0]
else:
requantize_op = func_body

bias_add = requantize_op.args[0]
qnn_dense = bias_add.args[0]

# We consider the weights & biases as params as they should be constant
self.weights = TensorParams(
qnn_dense.args[QDenseArgs.weights.value],
"OI",
qnn_dense.args[QDenseArgs.weights_scale.value],
qnn_dense.args[QDenseArgs.weights_zero_point.value],
)
self.biases = TensorParams(
bias_add.args[BiasAddArgs.BIASES.value],
None,
requantize_op.args[RequantArgs.IFM_SCALE.value],
requantize_op.args[RequantArgs.IFM_ZERO_POINT.value],
)
self.ifm = TensorParams(
qnn_dense.args[QDenseArgs.ifm.value],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ifm should be capitals i.e. QDenseArgs.IFM.value

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack!

None,
qnn_dense.args[QDenseArgs.ifm_scale.value],
qnn_dense.args[QDenseArgs.ifm_zero_point.value],
)
self.ofm = TensorParams(
func_body,
None,
requantize_op.args[RequantArgs.OFM_SCALE.value],
requantize_op.args[RequantArgs.OFM_ZERO_POINT.value],
)

self.activation = activation

def is_valid(self):
"""
Checks whether Fully Connected has compatible attributes with HW
"""

def check_weights_fc(weights):
"""Checks whether weight tensor is compatible with HW"""
weights_limit = 127 * 65536
# A saturation upper bound check for accumulators
weights.values = weights.values - weights.q_params.zero_point
axis = 1
sum_weights = np.amax(np.sum(np.absolute(weights.values), axis=axis))
if not sum_weights <= weights_limit:
return False
return True

if not check_valid_dtypes([self.input, self.output], supported_dtypes=[np.int8]):
return False
if not check_weights_fc(self.weights):
return False
if not check_bias(self.biases):
return False
if not check_batch_size(self.ifm):
return False
# Check input shape
if len(self.ifm.shape) < 2:
return False
if not np.all(np.array(self.ifm.shape[:-1]) == 1):
# As we reshape the ifm from
# [n0, n1, ... , n_m] to [n0 * n1 * ... * n_{m-1}, n_m]
# all except the last dims need to be 1.
return False
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need this due to reasoning in the above comment and since we already check that the batch size == 1 with check_batch_size above and we know that the ifm must be 2D

return True


def qnn_fc_pattern():
dense = is_op("qnn.dense")(
wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant()
)
bias_add = is_op("nn.bias_add")(dense, is_constant())
req = is_op("qnn.requantize")(
dense | bias_add, is_constant(), is_constant(), is_constant(), is_constant()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently I the legalization will fall over if there is not a bias present. We should make bias optional in FullyConnectedParams, see QnnTransposeConv2dParams for an idea

Copy link
Contributor Author

@dchauhan-arm dchauhan-arm Feb 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack! (and thanks for the pointer on transpose conv2d)

)
optional_clip = req.optional(is_op("clip"))
return optional_clip


@register_pattern_table("ethos-u")
def pattern_table() -> List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, Callable]]:
return [
Expand Down Expand Up @@ -1652,6 +1752,11 @@ def pattern_table() -> List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, Cal
squeeze_pattern(),
lambda pat: SqueezeParams(pat).is_valid(),
),
(
FullyConnectedParams.composite_name,
qnn_fc_pattern(),
lambda pat: FullyConnectedParams(pat).is_valid(),
),
]


Expand Down
21 changes: 21 additions & 0 deletions tests/python/contrib/test_ethosu/test_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1167,5 +1167,26 @@ def leaky_relu_func(x):
_compare_tvm_with_tflite(leaky_relu_func, [ifm_shape], accel_type)


@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
@pytest.mark.parametrize("units", [32, 64])
@pytest.mark.parametrize("use_bias", [True, False])
@pytest.mark.parametrize("activation_function", ["RELU", "NONE"])
def test_tflite_fully_connected(
accel_type,
units,
use_bias,
activation_function,
):
@tf.function
def fully_connected():
return tf.keras.layers.Dense(
Copy link
Contributor

@lhutton1 lhutton1 Feb 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not too familiar with the Keras API, but I'm not sure this will work. One thing we could do instead is use tf.matmul which gets legalized to fully connected in TFLite under the conditions we will use it for. e.g. something like this would be a starting point:

@tf.function
    def dense_layer(x):
        w = tf.constant(
            np.random.uniform(size=[units, units]),
            dtype=tf.float32,
        )
        return tf.matmul(x, w)

    _compare_tvm_with_tflite(dense_layer, [(1, units)], accel_type)

Happy to keep the Keras implementation if we get it working though, just wanted to offer an alternative :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a very welcome change, I'l try and make this work!

units=units,
activation=activation_function,
use_bias=use_bias,
)

_compare_tvm_with_tflite(fully_connected, (1, 3, units, 1), accel_type)


if __name__ == "__main__":
pytest.main([__file__])
82 changes: 82 additions & 0 deletions tests/python/contrib/test_ethosu/test_legalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2346,5 +2346,87 @@ def verify(ext_func):
verify(mod["tvmgen_default_ethos_u_main_0"])


@pytest.mark.parametrize("units", [32, 64])
@pytest.mark.parametrize("use_bias", [True, False])
@pytest.mark.parametrize("activation_function", ["RELU", "NONE"])
def test_tflite_fully_connected(
units,
use_bias,
activation_function,
):
dtype = "int8"

def create_tflite_graph():
class Model(tf.Module):
@tf.function
def fully_connected(self, x):
return tf.keras.layers.Dense(
units=units,
activation=activation_function,
use_bias=use_bias,
)(x)

model = Model()
concrete_func = model.fully_connected.get_concrete_function(
tf.TensorSpec([1, 3, units, 1], dtype=tf.float32)
)

# Convert the model
def representative_dataset():
for _ in range(100):
data = np.random.rand(*tuple([1, 3, units, 1]))
yield [data.astype(np.float32)]

converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
tflite_model = converter.convert()
return tflite_model

def verify(ext_func):
op = ext_func.body
ofm_channels = op.attrs.ofm_channels

# check IFM
ifm = op.args[0].checked_type
assert list([1, 3, units, 1]) == list([1, 3, units, 1])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This assert doesn't check anything... Some things to potentially check:

  • That we have ended up with a ethosu_conv2d op (taking into account that there might be reshape ops before and after the conv2d)
  • That the IFM is in a shape of (1, 1, 1, c)
  • That the weights are in a shape (o, 1, 1, c) with o being the output channels of the weights
  • That the kernel and dilation are (1, 1)

assert str(ifm.dtype) == dtype
assert ifm.shape[3] == ofm_channels

# Check that scale_bias matches weight tensor
assert list(op.args[2].checked_type.shape)[0] == ofm_channels

if activation_function == "RELU":
assert str(op.attrs.activation) == "CLIP"

dense_pattern_table = [
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: it would be better to keep the naming consistent, so maybe rename this to fc_pattern_table or fully_connected_pattern_table

(
ethosu.FullyConnectedParams.composite_name,
ethosu.qnn_fc_pattern(),
lambda pat: ethosu.FullyConnectedParams(pat).is_valid(),
)
]

tflite_graph = create_tflite_graph()
tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)

mod, params = relay.frontend.from_tflite(
tflite_model,
shape_dict={"input": [1, 3, units, 1]},
dtype_dict={"input": dtype},
)

mod["main"] = bind_params_by_name(mod["main"], params)
mod = partition_ethosu_by_table(mod, dense_pattern_table)

mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
legalize.FullyConnectedRewriter(), mod["tvmgen_default_ethos_u_main_0"]
)
verify(mod["tvmgen_default_ethos_u_main_0"])


if __name__ == "__main__":
pytest.main([__file__])