@@ -255,7 +255,7 @@ int Softmax_arm::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt)
255
255
float16x8_t _ss01 = vpaddq_f16 (_p0, _p1);
256
256
float16x8_t _ss23 = vpaddq_f16 (_p2, _p3);
257
257
float16x8_t _ss2 = vpaddq_f16 (_ss01, _ss23);
258
- _sum = vadd_f16 (_sum, vpmax_f16 (vget_low_f16 (_ss2), vget_high_f16 (_ss2)));
258
+ _sum = vadd_f16 (_sum, vpadd_f16 (vget_low_f16 (_ss2), vget_high_f16 (_ss2)));
259
259
vst1_f16 (sumptr, _sum);
260
260
ptr += 32 ;
261
261
maxptr += 4 ;
@@ -292,7 +292,7 @@ int Softmax_arm::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt)
292
292
vst1q_f16 (ptr, _p0);
293
293
vst1q_f16 (ptr + 8 , _p1);
294
294
float16x8_t _ss2 = vpaddq_f16 (_p0, _p1);
295
- _sum = vadd_f16 (_sum, vpmax_f16 (vget_low_f16 (_ss2), vget_high_f16 (_ss2)));
295
+ _sum = vadd_f16 (_sum, vpadd_f16 (vget_low_f16 (_ss2), vget_high_f16 (_ss2)));
296
296
vst1_f16 (sumptr, _sum);
297
297
ptr += 16 ;
298
298
maxptr += 4 ;
@@ -743,7 +743,7 @@ int Softmax_arm::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt)
743
743
float16x8_t _ss01 = vpaddq_f16 (_p0, _p1);
744
744
float16x8_t _ss23 = vpaddq_f16 (_p2, _p3);
745
745
float16x8_t _ss2 = vpaddq_f16 (_ss01, _ss23);
746
- _sum = vadd_f16 (_sum, vpmax_f16 (vget_low_f16 (_ss2), vget_high_f16 (_ss2)));
746
+ _sum = vadd_f16 (_sum, vpadd_f16 (vget_low_f16 (_ss2), vget_high_f16 (_ss2)));
747
747
vst1_f16 (sumptr, _sum);
748
748
ptr += 32 ;
749
749
sumptr += 4 ;
@@ -768,7 +768,7 @@ int Softmax_arm::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt)
768
768
float16x8_t _p1 = vld1q_f16 (ptr + 8 );
769
769
float16x4_t _sum = vld1_f16 (sumptr);
770
770
float16x8_t _ss2 = vpaddq_f16 (_p0, _p1);
771
- _sum = vadd_f16 (_sum, vpmax_f16 (vget_low_f16 (_ss2), vget_high_f16 (_ss2)));
771
+ _sum = vadd_f16 (_sum, vpadd_f16 (vget_low_f16 (_ss2), vget_high_f16 (_ss2)));
772
772
vst1_f16 (sumptr, _sum);
773
773
ptr += 16 ;
774
774
sumptr += 4 ;
0 commit comments