We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 985d659 commit 8f09eb2Copy full SHA for 8f09eb2
lite/backends/arm/math/type_trans.cc
@@ -57,9 +57,9 @@ void fp32_to_int8(const float* din,
57
#ifdef __aarch64__
58
float32x4_t vmax = vdupq_n_f32(-127.0);
59
asm volatile(
60
+ "0: \n" /* main loop */
61
"ldp q0, q1, [%[in]], #32 \n"
62
"ldp q2, q3, [%[in]], #32 \n"
- "0: \n" /* main loop */
63
"fmul v4.4s, v0.4s, %[scale].4s \n"
64
"fmul v5.4s, v1.4s, %[scale].4s \n"
65
"fmul v6.4s, v2.4s, %[scale].4s \n"
@@ -74,14 +74,12 @@ void fp32_to_int8(const float* din,
74
"bif v5.16b, %[vmax].16b, v9.16b \n"
75
"bif v6.16b, %[vmax].16b, v10.16b \n"
76
"bif v7.16b, %[vmax].16b, v11.16b \n"
77
- "ldp q0, q1, [%[in]], #32 \n"
78
"subs %[cnt], %[cnt], #1 \n"
79
/* fp32 - int32 */
80
"FCVTAS v8.4s, v4.4s \n"
81
"FCVTAS v9.4s, v5.4s \n"
82
"FCVTAS v10.4s, v6.4s \n"
83
"FCVTAS v11.4s, v7.4s \n"
84
- "ldp q2, q3, [%[in]], #32 \n"
85
"sqxtn v4.4h, v8.4s \n"
86
"sqxtn2 v4.8h, v9.4s \n"
87
"sqxtn v5.4h, v10.4s \n"
0 commit comments