Skip to content

Commit 4dc9cac

Browse files
author
ibsidorenko
committed
Removed QDenseAttrs and QConv2dAttrs
1 parent 71212d7 commit 4dc9cac

File tree

13 files changed

+79
-250
lines changed

13 files changed

+79
-250
lines changed

include/tvm/relay/qnn/attrs.h

Lines changed: 0 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#define TVM_RELAY_QNN_ATTRS_H_
2626

2727
#include <tvm/ir/attrs.h>
28-
#include <tvm/relay/base.h>
2928

3029
#include <string>
3130

@@ -126,104 +125,6 @@ struct BroadcastAttrs : public tvm::AttrsNode<BroadcastAttrs> {
126125
}
127126
};
128127

129-
/*! \brief Attributes used in QNN convolution operator */
130-
struct QConv2DAttrs : public tvm::AttrsNode<QConv2DAttrs> {
131-
Array<IndexExpr> strides;
132-
Array<IndexExpr> padding;
133-
Array<IndexExpr> dilation;
134-
int groups;
135-
IndexExpr channels;
136-
Array<IndexExpr> kernel_size;
137-
tvm::String data_layout;
138-
tvm::String kernel_layout;
139-
tvm::String out_layout;
140-
tvm::String auto_scheduler_rewritten_layout; // The layout after auto-scheduler's layout rewrite
141-
Array<PrimExpr> meta_schedule_original_shape; // The original shape of the weights
142-
DataType out_dtype;
143-
144-
// Optional extra attributes for Hexagon target. Describes requantization parameters.
145-
// Note, It is not set up explicitly through qnn._make.conv2d.
146-
int axis;
147-
DataType rq_out_dtype;
148-
149-
TVM_DECLARE_ATTRS(QConv2DAttrs, "relay.attrs.QConv2DAttrs") {
150-
TVM_ATTR_FIELD(strides)
151-
.set_default(Array<IndexExpr>({1, 1}))
152-
.describe("Specifies the strides of the convolution.");
153-
TVM_ATTR_FIELD(padding)
154-
.set_default(Array<IndexExpr>({0, 0}))
155-
.describe(
156-
"If padding is non-zero, then the input is implicitly zero-padded"
157-
"Padding support both symmetric and asymmetric as"
158-
"one int : same padding used on all sides"
159-
"two int : bottom, right will use same padding as top, left"
160-
"four int : padding width in the order of (top, left, bottom, right)");
161-
TVM_ATTR_FIELD(dilation)
162-
.set_default(Array<IndexExpr>({1, 1}))
163-
.describe("Specifies the dilation rate to use for dilated convolution.");
164-
TVM_ATTR_FIELD(groups).set_default(1).describe(
165-
"Controls the connections between inputs and outputs."
166-
"At groups=1, all inputs are convolved to all outputs."
167-
"At groups=2, the operation becomes equivalent to having two convolution"
168-
"layers side by side, each seeing half the input channels, and producing"
169-
"half the output channels, and both subsequently concatenated.");
170-
TVM_ATTR_FIELD(channels)
171-
.describe(
172-
"The number of output channels in the convolution."
173-
" If it is not set, inferred by shape of the weight.")
174-
.set_default(NullValue<IndexExpr>());
175-
TVM_ATTR_FIELD(kernel_size)
176-
.describe("Specifies the dimensions of the convolution window.")
177-
.set_default(NullValue<Array<IndexExpr>>());
178-
TVM_ATTR_FIELD(data_layout)
179-
.set_default("NCHW")
180-
.describe(
181-
"Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc."
182-
"'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
183-
"dimensions respectively. Convolution is applied on the 'H' and"
184-
"'W' dimensions.");
185-
TVM_ATTR_FIELD(kernel_layout)
186-
.set_default("OIHW")
187-
.describe(
188-
"Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc."
189-
"'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width"
190-
"dimensions respectively.");
191-
TVM_ATTR_FIELD(out_layout)
192-
.set_default("")
193-
.describe(
194-
"Dimension ordering of output. Can be 'NCHW', 'NHWC', etc."
195-
"'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
196-
"dimensions respectively. Default to be same as input layout.");
197-
198-
// use 0 bits to indicate none.
199-
TVM_ATTR_FIELD(out_dtype)
200-
.set_default(NullValue<DataType>())
201-
.describe("Output data type, set to explicit type under mixed precision setting");
202-
}
203-
};
204-
205-
/*! \brief Attributes for QNN dense operator */
206-
struct QDenseAttrs : public tvm::AttrsNode<QDenseAttrs> {
207-
IndexExpr units;
208-
tvm::String auto_scheduler_rewritten_layout; // The layout after auto-scheduler's layout rewrite
209-
Array<PrimExpr> meta_schedule_original_shape; // The original shape of the weights
210-
DataType out_dtype;
211-
212-
// Optional extra attributes for Hexagon target. Describes requantization parameters.
213-
// Note, It is not set up explicitly through qnn._make.dense.
214-
int axis;
215-
DataType rq_out_dtype;
216-
217-
TVM_DECLARE_ATTRS(QDenseAttrs, "relay.attrs.QDenseAttrs") {
218-
TVM_ATTR_FIELD(units).describe("Number of hidden units of the dense transformation.");
219-
220-
// use 0 bits to indicate none.
221-
TVM_ATTR_FIELD(out_dtype)
222-
.set_default(NullValue<DataType>())
223-
.describe("Output data type, set to explicit type under mixed precision setting");
224-
}
225-
};
226-
227128
} // namespace qnn
228129
} // namespace relay
229130
} // namespace tvm

python/tvm/relay/backend/te_compiler.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -281,25 +281,28 @@ def get_shape(shape):
281281

282282

283283
@tvm._ffi.register_func("relay.backend.lower_call")
284-
def lower_call(call, inputs, target):
284+
def lower_call(call, inputs, target, otype=None):
285285
"""Lower the call expression to op implementation and tensor outputs."""
286286
assert isinstance(call.op, tvm.ir.Op)
287287
op = call.op
288288

289-
# Prepare the call_node->checked_type(). For the call node inputs, we ensure that
290-
# the shape is Int32. Following code ensures the same for the output as well.
291-
# TODO(@icemelon9): Support recursive tuple
292-
ret_type = call.checked_type
293-
if isinstance(ret_type, _ty.TensorType):
294-
ret_type = _ty.TensorType(get_shape(ret_type.shape), ret_type.dtype)
295-
elif isinstance(ret_type, _ty.TupleType):
296-
new_fields = []
297-
for field in ret_type.fields:
298-
if isinstance(field, _ty.TensorType):
299-
new_fields.append(_ty.TensorType(get_shape(field.shape), field.dtype))
300-
else:
301-
new_fields.append(field)
302-
ret_type = _ty.TupleType(new_fields)
289+
if otype is not None:
290+
ret_type = otype
291+
else:
292+
# Prepare the call_node->checked_type(). For the call node inputs, we ensure that
293+
# the shape is Int32. Following code ensures the same for the output as well.
294+
# TODO(@icemelon9): Support recursive tuple
295+
ret_type = call.checked_type
296+
if isinstance(ret_type, _ty.TensorType):
297+
ret_type = _ty.TensorType(get_shape(ret_type.shape), ret_type.dtype)
298+
elif isinstance(ret_type, _ty.TupleType):
299+
new_fields = []
300+
for field in ret_type.fields:
301+
if isinstance(field, _ty.TensorType):
302+
new_fields.append(_ty.TensorType(get_shape(field.shape), field.dtype))
303+
else:
304+
new_fields.append(field)
305+
ret_type = _ty.TupleType(new_fields)
303306

304307
is_dyn = _ty.is_dynamic(call.checked_type)
305308
for arg in call.args:

python/tvm/relay/op/op_attrs.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -529,16 +529,6 @@ class RequantizeAttrs(Attrs):
529529
"""Attributes used in requantize operators"""
530530

531531

532-
@tvm._ffi.register_object("relay.attrs.QConv2DAttrs")
533-
class QConv2DAttrs(Attrs):
534-
"""Attributes used in QNN conv2d operators"""
535-
536-
537-
@tvm._ffi.register_object("relay.attrs.QDenseAttrs")
538-
class QDenseAttrs(Attrs):
539-
"""Attributes used in QNN dense operators"""
540-
541-
542532
@tvm._ffi.register_object("relay.attrs.ScatterAttrs")
543533
class ScatterAttrs(Attrs):
544534
"""Attributes used in scatter operators"""

python/tvm/relay/qnn/strategy/generic.py

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,9 @@
1616
# under the License.
1717
"""Definition of generic operator strategy."""
1818

19-
from tvm import _ffi
2019
from tvm.target import override_native_generic_func
2120

2221

23-
GET_RQ_OUT_DTYPE = _ffi.get_global_func("relay.attrs.get_rq_out_dtype")
24-
GET_RQ_AXIS = _ffi.get_global_func("relay.attrs.get_rq_axis")
25-
26-
2722
def wrap_topi_schedule(topi_schedule):
2823
"""Wrap TOPI schedule which doesn't use attrs"""
2924

@@ -69,14 +64,13 @@ def wrap_topi_qnn_conv2d(topi_compute):
6964
"""Wrap TOPI compute which use conv2d attrs and output data type"""
7065

7166
def wrapper(attrs, inputs, out_type):
72-
out_dtype = GET_RQ_OUT_DTYPE(attrs)
73-
axis = GET_RQ_AXIS(attrs)
67+
out_dtype = out_type.dtype
7468
oshape = out_type.shape
7569
strides = attrs.strides
7670
padding = attrs.padding
7771
dilation = attrs.dilation
7872
if len([*inputs]) == 11:
79-
args = [*inputs, axis, strides, padding, dilation, oshape, out_dtype]
73+
args = [*inputs, strides, padding, dilation, oshape, out_dtype]
8074
elif len([*inputs]) == 10:
8175
args = [ # QNN Conv2d params:
8276
inputs[0],
@@ -92,7 +86,6 @@ def wrapper(attrs, inputs, out_type):
9286
inputs[7],
9387
inputs[8],
9488
inputs[9],
95-
axis,
9689
# Conv2d attrs:
9790
strides,
9891
padding,
@@ -111,7 +104,6 @@ def wrapper(attrs, inputs, out_type):
111104
None,
112105
None,
113106
None,
114-
axis,
115107
strides,
116108
padding,
117109
dilation,
@@ -126,11 +118,10 @@ def wrapper(attrs, inputs, out_type):
126118
def wrap_topi_qnn_dense(topi_compute):
127119
"""Wrap TOPI compute which use qnn.dense attrs"""
128120

129-
def wrapper(attrs, inputs, _out_type):
130-
out_dtype = GET_RQ_OUT_DTYPE(attrs)
131-
axis = GET_RQ_AXIS(attrs)
121+
def wrapper(_attrs, inputs, out_type):
122+
out_dtype = out_type.dtype
132123
if len([*inputs]) == 11:
133-
args = [*inputs, axis, out_dtype]
124+
args = [*inputs, out_dtype]
134125
elif len([*inputs]) == 10:
135126
args = [ # QNN Dense params:
136127
inputs[0],
@@ -146,7 +137,6 @@ def wrapper(attrs, inputs, _out_type):
146137
inputs[7],
147138
inputs[8],
148139
inputs[9],
149-
axis,
150140
out_dtype,
151141
]
152142
else:
@@ -160,7 +150,6 @@ def wrapper(attrs, inputs, _out_type):
160150
None,
161151
None,
162152
None,
163-
axis,
164153
out_dtype,
165154
]
166155
return [topi_compute(*args)]

python/tvm/topi/hexagon/qnn.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -151,12 +151,8 @@ def qnn_requantize(data, input_scale, input_zp, output_scale, output_zp, axis, o
151151
def _compute(*indices):
152152
value = data(*indices)
153153

154-
# Account scalar and 1D quantization parameters:
155-
iscale_idx = tvm.tir.indexmod(indices[axis], topi.shape(input_scale)[0])
156-
iscale = input_scale if len(input_scale.shape) == 0 else input_scale[iscale_idx]
157-
158-
oscale_idx = tvm.tir.indexmod(indices[axis], topi.shape(output_scale)[0])
159-
oscale = output_scale if len(output_scale.shape) == 0 else output_scale[oscale_idx]
154+
iscale = get_qnn_param(input_scale, indices, axis)
155+
oscale = get_qnn_param(output_scale, indices, axis)
160156

161157
sub = te.subtract(value, input_zp)
162158
mul = te.div(iscale, oscale)
@@ -334,7 +330,6 @@ def qnn_conv2d( # Conv2d inputs
334330
rq_input_zero_point,
335331
rq_output_scale,
336332
rq_output_zero_point,
337-
axis,
338333
# Conv2d attributes:
339334
strides,
340335
padding,
@@ -402,6 +397,13 @@ def qnn_conv2d( # Conv2d inputs
402397
# Requantize output of convolution
403398
# Q_output = zp_output + round((scale_input)/(scale_output) * (Q_input - zp_input))
404399
if rq_input_scale is not None and rq_output_scale is not None:
400+
# Now supported only scalar and 1D quantization parameters
401+
assert len(rq_input_scale.shape) == 0 or len(rq_input_scale.shape) == 1
402+
assert len(rq_output_scale.shape) == 0 or len(rq_output_scale.shape) == 1
403+
axis = -1
404+
if len(rq_input_scale.shape) == 1 or len(rq_output_scale.shape) == 1:
405+
axis = 1 # Axis param should correspond to 'C' dimension.
406+
405407
return qnn_requantize(
406408
out,
407409
rq_input_scale,
@@ -447,7 +449,6 @@ def qnn_depthwise_conv2d( # Conv2d inputs
447449
rq_input_zero_point,
448450
rq_output_scale,
449451
rq_output_zero_point,
450-
axis,
451452
# Conv2d attributes:
452453
strides,
453454
padding,
@@ -510,6 +511,13 @@ def qnn_depthwise_conv2d( # Conv2d inputs
510511
# Requantize output of convolution
511512
# Q_output = zp_output + round((scale_input)/(scale_output) * (Q_input - zp_input))
512513
if rq_input_scale is not None and rq_output_scale is not None:
514+
# Now supported only scalar and 1D quantization parameters
515+
assert len(rq_input_scale.shape) == 0 or len(rq_input_scale.shape) == 1
516+
assert len(rq_output_scale.shape) == 0 or len(rq_output_scale.shape) == 1
517+
axis = -1
518+
if len(rq_input_scale.shape) == 1 or len(rq_output_scale.shape) == 1:
519+
axis = 1 # Axis param should correspond to 'C' dimension.
520+
513521
return qnn_requantize(
514522
out,
515523
rq_input_scale,
@@ -555,15 +563,13 @@ def qnn_dense(
555563
rq_input_zero_point,
556564
rq_output_scale,
557565
rq_output_zero_point,
558-
axis,
559566
out_dtype,
560567
):
561568
"""Compute for qnn.dense
562569
563570
Note! This is POC code. There was no goal to implement high performance compute function.
564571
565572
"""
566-
567573
M, K = get_const_tuple(data.shape)
568574
N, _ = get_const_tuple(weight.shape)
569575
k = te.reduce_axis((0, K), "k")
@@ -587,6 +593,13 @@ def qnn_dense(
587593
# Requantize output of dense
588594
# Q_output = zp_output + round((scale_input)/(scale_output) * (Q_input - zp_input))
589595
if rq_input_scale is not None and rq_output_scale is not None:
596+
# Now supported only scalar and 1D quantization parameters
597+
assert len(rq_input_scale.shape) == 0 or len(rq_input_scale.shape) == 1
598+
assert len(rq_output_scale.shape) == 0 or len(rq_output_scale.shape) == 1
599+
axis = -1
600+
if len(rq_input_scale.shape) == 1 or len(rq_output_scale.shape) == 1:
601+
axis = 1 # Axis param should correspond to 'N' dimension.
602+
590603
return qnn_requantize(
591604
out,
592605
rq_input_scale,

src/relay/backend/contrib/cmsisnn/convolutions.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ namespace relay {
2929
namespace contrib {
3030
namespace cmsisnn {
3131

32-
bool IsCMSISNNDepthwise(const qnn::QConv2DAttrs* conv2d_attrs, const Array<PrimExpr>& input_shape,
32+
bool IsCMSISNNDepthwise(const Conv2DAttrs* conv2d_attrs, const Array<PrimExpr>& input_shape,
3333
const Array<PrimExpr>& kernel_shape) {
3434
std::string kernel_layout = conv2d_attrs->kernel_layout.c_str();
3535
int kernel_pos_o = kernel_layout.find("O");

src/relay/backend/contrib/cmsisnn/convolutions.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ namespace cmsisnn {
4949
* attributes
5050
*/
5151

52-
bool IsCMSISNNDepthwise(const qnn::QConv2DAttrs* conv2d_attrs, const Array<PrimExpr>& input_shape,
52+
bool IsCMSISNNDepthwise(const Conv2DAttrs* conv2d_attrs, const Array<PrimExpr>& input_shape,
5353
const Array<PrimExpr>& kernel_shape);
5454

5555
} // namespace cmsisnn

src/relay/backend/contrib/cmsisnn/generate_constants.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,7 @@ class GenerateConstantsMutator : public MixedModeMutator {
5050

5151
private:
5252
/*! * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
53-
Expr ConvertKernelLayout(Expr kernel_expr, const qnn::QConv2DAttrs* conv2d_attrs,
54-
Attrs* new_attrs) {
53+
Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
5554
auto attrs = make_object<Conv2DAttrs>();
5655
attrs->strides = std::move(conv2d_attrs->strides);
5756
attrs->padding = std::move(conv2d_attrs->padding);
@@ -107,7 +106,7 @@ class GenerateConstantsMutator : public MixedModeMutator {
107106
conv2d_call = requantize_input;
108107
}
109108

110-
auto* conv2d_attrs = conv2d_call->attrs.as<qnn::QConv2DAttrs>();
109+
auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
111110
tvm::Attrs new_conv2d_attrs = conv2d_call->attrs;
112111
Expr conv2d_kernel = conv2d_call->args[1];
113112

src/relay/backend/contrib/cmsisnn/relay_to_tir.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ class RelayToTIRVisitor : public MixedModeMutator {
163163
// https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
164164

165165
// prepare cmsis_nn_conv_params
166-
const qnn::QConv2DAttrs* conv2d_attrs = conv2d_call->attrs.as<qnn::QConv2DAttrs>();
166+
const Conv2DAttrs* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
167167
int32_t input_offset = -GetScalarFromConstant<int32_t>(conv2d_call->args[2]);
168168
int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
169169
int32_t stride_w = qnn::get_const_int(conv2d_attrs->strides[1]);
@@ -310,7 +310,7 @@ class RelayToTIRVisitor : public MixedModeMutator {
310310
// https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
311311

312312
// prepare cmsis_nn_fc_params
313-
const qnn::QDenseAttrs* dense_attrs = fc_call->attrs.as<qnn::QDenseAttrs>();
313+
const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
314314
int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
315315
int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
316316
int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);

0 commit comments

Comments
 (0)