Skip to content

Commit d766c6d

Browse files
author
xiaying
committed
Fix bug for compute MatMul E=1 for multi-thread
1 parent 7ccdbed commit d766c6d

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

source/backend/cpu/x86_x64/avx/CommonOptFunction.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -747,7 +747,7 @@ void _AVX_MNNComputeMatMulForE_1(const float* A, const float* B, float* C, const
747747
}
748748
_mm256_storeu_ps(C + 8 * y, sumValue);
749749
}
750-
for (int y=hR; y<h; y+=numberThread) {
750+
for (int y = hR + tId; y<h; y+=numberThread) {
751751
auto bs = B + y;
752752
float sumValue = 0.0f;
753753
if (biasPtr != nullptr) {
@@ -802,7 +802,7 @@ void _AVX_MNNComputeMatMulForE_1FMA(const float* A, const float* B, float* C, co
802802
}
803803
_mm256_storeu_ps(C + 8 * y, sumValue);
804804
}
805-
for (int y=hR; y<h; y+=numberThread) {
805+
for (int y= hR + tId; y<h; y+=numberThread) {
806806
auto bs = B + y;
807807
float sumValue = 0.0f;
808808
if (biasPtr != nullptr) {

source/backend/cpu/x86_x64/sse/CommonOptFunction.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -670,7 +670,7 @@ void _SSE_MNNComputeMatMulForE_1(const float* A, const float* B, float* C, const
670670
}
671671
_mm_storeu_ps(C + 4 * y, sumValue);
672672
}
673-
for (int y=hR; y<h; y+=numberThread) {
673+
for (int y=hR + tId; y<h; y+=numberThread) {
674674
auto bs = B + y;
675675
float sumValue = 0.0f;
676676
if (biasPtr != nullptr) {

0 commit comments

Comments
 (0)