Skip to content

Commit 3237b35

Browse files
committed
Formatting fixes
1 parent 218f5c9 commit 3237b35

File tree

3 files changed

+23
-15
lines changed

3 files changed

+23
-15
lines changed

python/tvm/topi/arm_cpu/mprofile/dsp/micro_kernel/tensordot.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def get_c_function_name(split_size, dimensions, offsets, x_strides):
3838
+ (f"_{x_strides[0]}_{x_strides[1]}" if split_size > 1 else "")
3939
)
4040

41+
4142
def _is_pow_2(number):
4243
"""Checks if `number` is a power of `2`."""
4344
return number & (number - 1) == 0 and number > 0
@@ -160,11 +161,11 @@ def _apply_simd_optimizations(instruction_tuples) -> Iterator[Tuple]:
160161
}
161162

162163

163-
#def _no_first_accumulate(instruction_tuples) -> Iterator[Tuple]:
164-
# ins, op1, op2 = next(instruction_tuples)
165-
# yield NO_ACC_PREFIX_CONVERSIONS[ins], op1, op2
166-
# for instruction_tuple in instruction_tuples:
167-
# yield instruction_tuple
164+
# def _no_first_accumulate(instruction_tuples) -> Iterator[Tuple]:
165+
# ins, op1, op2 = next(instruction_tuples)
166+
# yield NO_ACC_PREFIX_CONVERSIONS[ins], op1, op2
167+
# for instruction_tuple in instruction_tuples:
168+
# yield instruction_tuple
168169

169170

170171
def _expand_instruction_tuples(instruction_tuples, index) -> Iterator[str]:
@@ -205,6 +206,7 @@ def _expand_instruction_tuples(instruction_tuples, index) -> Iterator[str]:
205206
else:
206207
yield f'asm ("{instruction} %0, %1, %2" : "=r" (sum_{index}) : "r" ({op1}), "r" ({op2}));'
207208

209+
208210
def _requantize_sums(num_sums) -> Iterator[str]:
209211
"""Simulates multiplying by the float32 requantization scale by doing a int64 multiply + shift,
210212
which is much faster. The bias is added at the beginning, so we can skip doing it now. The shift

tests/python/relay/strategy/arm_cpu/test_quantized_convolution.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,6 @@ def _get_quant_zp_const(quantization_dict, as_scalar = False):
154154
def test_qnn_conv2d_mobilenetv1_layer(layer, interpreter):
155155
in_dtype = "int8"
156156
schedule_name, dtype, padding, strides = _get_layer_attributes(layer)
157-
158157
"""Load the input, kernel, bias, and generated output from each layer when it was run by the
159158
TensorFlow TFLite interpreter. The tensor values are quantized (though note that biases_tensor
160159
is an int32), while the quantization data is not. Note the zero points are zero everywhere
@@ -163,10 +162,13 @@ def test_qnn_conv2d_mobilenetv1_layer(layer, interpreter):
163162
def lookup(detail):
164163
return interpreter.get_tensor(detail["index"]), detail["quantization_parameters"]
165164
inputs_tensor, inputs_quant = lookup(_get_main_path_tensor_details(tensor_details, layer))
165+
print(inputs_tensor.shape)
166166
kernel_tensor, kernel_quant = lookup(_get_kernel_details(tensor_details, layer))
167+
print(kernel_tensor.shape)
167168
biases_tensor, biases_quant = lookup(_get_bias_details(tensor_details, layer))
169+
print(biases_tensor.shape)
168170
output_tensor, output_quant = lookup(_get_main_path_tensor_details(tensor_details, layer + 1))
169-
171+
out_channel_multiplier, kernel_h, kernel_w, in_channels = kernel_tensor.shape
170172

171173
# Reshape tensors to match the layouts we will see after legalization
172174
if layer % 2 == 0: # Regular conv2d
@@ -187,15 +189,15 @@ def lookup(detail):
187189
kernel_zero_point=_get_quant_zp_const(kernel_quant),
188190
input_scale=_get_quant_scale_const(inputs_quant, as_scalar=True),
189191
kernel_scale=_get_quant_scale_const(kernel_quant),
190-
kernel_size=(3, 3),
192+
kernel_size=(kernel_h, kernel_w),
191193
data_layout=new_inputs_layout,
192194
kernel_layout=new_kernel_layout,
193195

194196
dilation=(1, 1),
195197
strides=strides,
196198
padding=padding,
197-
groups=(1 if layer % 2 == 0 else 3),
198-
channels=8,
199+
groups=(1 if layer % 2 == 0 else in_channels),
200+
channels=(out_channel_multiplier if layer % 2 == 0 else in_channels),
199201
out_dtype="int32",
200202
)
201203

@@ -219,7 +221,7 @@ def lookup(detail):
219221
test_model = AOTTestModel(
220222
module=tvm.IRModule.from_expr(test_function),
221223
inputs={"input": inputs_ndarr},
222-
outputs={"Identity": output_ndarr},
224+
outputs={"output": output_ndarr},
223225
)
224226

225227
compile_and_run(
@@ -229,7 +231,7 @@ def lookup(detail):
229231
use_unpacked_api=True,
230232
target_opts={
231233
"-keys": "arm_cpu",
232-
"-mcpu": "cortex-m4",
234+
"-mcpu": "cortex-m7",
233235
},
234236
schedule_name=schedule_name,
235237
verbose=True,

tests/python/topi/python/test_topi_conv2d_tensordot_opts.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,13 @@
2929

3030

3131
def test_write_3x3_depthwise_code():
32-
"""
32+
"""This is the function that would be generated for a 1x4x48x48 NCHW input tensor with "SAME"
33+
padding. We are only computing one sum at once, so we don't need stride or output. Note that
34+
this is pretty inefficient - it would be much better to compute a few sums concurrently.
35+
36+
When inlined, this code compiles (with armv7-a clang 11) into:
3337
38+
tensordot_opt_x1_int16_w48_3x3_000(int*, int*, int*, int*, int*):
3439
ldr.w lr, [r3]
3540
ldrd r11, r4, [r1]
3641
ldrd r5, r9, [r1, #96]
@@ -225,7 +230,6 @@ def test_1x1x8_convolution_code():
225230
)
226231

227232

228-
229233
def test_3x3x3_offset_convolution_code():
230234
"""This is the function that would be generated for a 1x96x96x3 NHWC input tensor under
231235
standard convolution with a 3x3x3 kernel - the first layer of MobileNetV1. This is special, as
@@ -304,4 +308,4 @@ def test_3x3x3_offset_convolution_code():
304308
return 0;
305309
}
306310
"""
307-
)
311+
)

0 commit comments

Comments
 (0)