@@ -343,9 +343,9 @@ static void vec_dot_q4x4x2_q8x4x2(const int n, float * restrict s, const void *
343343    }
344344
345345    // Reduce and convert into fp32 
346-     r0_sum  =  hvx_vec_qf32_reduce_sum ( r0_sum );
346+     r0_sum  =  hvx_vec_fp32_reduce_sum ( Q6_Vsf_equals_Vqf32 ( r0_sum ) );
347347
348-     hvx_vec_store_u (& s [0 ], 4 , Q6_Vsf_equals_Vqf32 ( r0_sum ) );
348+     hvx_vec_store_u (& s [0 ], 4 , r0_sum );
349349}
350350
351351static  void  vec_dot_q4x4x2_q8x4x2_rx2 (const  int  n ,
@@ -516,9 +516,9 @@ static void vec_dot_q8x4x2_q8x4x2(const int n, float * restrict s, const void *
516516    }
517517
518518    // Reduce and convert into fp32 
519-     r0_sum  =  hvx_vec_qf32_reduce_sum ( r0_sum );
519+     r0_sum  =  hvx_vec_fp32_reduce_sum ( Q6_Vsf_equals_Vqf32 ( r0_sum ) );
520520
521-     hvx_vec_store_u (& s [0 ], 4 , Q6_Vsf_equals_Vqf32 ( r0_sum ) );
521+     hvx_vec_store_u (& s [0 ], 4 , r0_sum );
522522}
523523
524524static  void  vec_dot_q8x4x2_q8x4x2_rx2 (const  int  n ,
@@ -722,9 +722,9 @@ static void vec_dot_mxfp4x4x2_q8x4x2(const int n,
722722    }
723723
724724    // Reduce and convert into fp32 
725-     r0_sum  =  hvx_vec_qf32_reduce_sum ( r0_sum );
725+     r0_sum  =  hvx_vec_fp32_reduce_sum ( Q6_Vsf_equals_Vqf32 ( r0_sum ) );
726726
727-     hvx_vec_store_u (& s [0 ], 4 , Q6_Vsf_equals_Vqf32 ( r0_sum ) );
727+     hvx_vec_store_u (& s [0 ], 4 , r0_sum );
728728}
729729
730730static  void  vec_dot_mxfp4x4x2_q8x4x2_rx2 (const  int  n ,
0 commit comments