Skip to content

Commit bee5645

Browse files
NicolaLancellottiAshutosh Parkhi
andcommitted
Fix type punning error
Co-authored-by: Ashutosh Parkhi <[email protected]>
1 parent 238fcd7 commit bee5645

File tree

1 file changed

+15
-9
lines changed

1 file changed

+15
-9
lines changed

python/tvm/topi/arm_cpu/mprofile/dsp/micro_kernel/multi_channel_convolve.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -179,20 +179,26 @@ def _dual_int16_channel_convolve_impl(_tensor_h, tensor_w, channels, kernel_h, k
179179
extern "C"
180180
#endif
181181
int32_t {_get_func_name("int16", tensor_w, channels, kernel_h, kernel_w, suffix)}(
182-
uint32_t *out,
183-
uint32_t *tensor,
184-
uint32_t *kernel) {{
185-
186-
uint32_t sum_c0 = 0;
187-
uint32_t sum_c1 = 0;
182+
int32_t *out,
183+
int16_t *tensor,
184+
int16_t *kernel) {{
185+
186+
int32_t sum_c0 = 0;
187+
int32_t sum_c1 = 0;
188+
189+
int32_t kernel_i32[{kernel_h} * {kernel_w}];
190+
memcpy(kernel_i32, kernel, {kernel_h} * {kernel_w} * 4);
191+
192+
int32_t tensor_length = {((kernel_w - 1) * (channels // 2) + (kernel_h - 1) * tensor_w * (channels // 2)) + 1};
193+
int32_t tensor_i32[tensor_length];
194+
memcpy(tensor_i32, tensor, tensor_length * 4);
188195
189196
#pragma GCC unroll 3
190197
for (int i = 0; i < {kernel_h}; i++) {{
191198
#pragma GCC unroll 3
192199
for (int j = 0; j < {kernel_w}; j++) {{
193-
uint32_t tensor_c10 = *(tensor + j * {channels // 2}
194-
+ i * {tensor_w * (channels // 2)});
195-
uint32_t kernel_c10 = *kernel++;
200+
int32_t tensor_c10 = tensor_i32[j * {channels // 2} + i * {tensor_w * (channels // 2)}];
201+
int32_t kernel_c10 = kernel_i32[{kernel_w} * i + j];
196202
sum_c0 = __builtin_arm_smlabb(tensor_c10, kernel_c10, sum_c0);
197203
sum_c1 = __builtin_arm_smlatt(tensor_c10, kernel_c10, sum_c1);
198204
}}

0 commit comments

Comments
 (0)