@@ -12472,6 +12472,11 @@ static void ggml_compute_forward_sum_rows_f32(
1247212472 float * dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3);
1247312473 float row_sum = 0;
1247412474 ggml_vec_sum_f32(ne00, &row_sum, src_row);
12475+ if (!isfinite(row_sum)) {
12476+ fprintf(stderr, "Oops(%s, %s): found %g for i1 = %d, i2 = %d, i3 = %d. ne00 = %d\n", __func__, dst->name,
12477+ (double)row_sum, (int)i1, (int)i2, (int)i3, (int)ne00);
12478+ exit(1);
12479+ }
1247512480 dst_row[0] = row_sum;
1247612481 }
1247712482 }
@@ -14759,6 +14764,18 @@ static void ggml_compute_forward_mul_mat_id(
1475914764
1476014765#define MMID_MATRIX_ROW(row_id, i1) matrix_rows[(row_id)*ne12 + (i1)]
1476114766
14767+ GGML_ASSERT(ids->ne[1] == dst->ne[2]);
14768+ for (int64_t iid1 = ith; iid1 < ids->ne[1]; iid1 += nth) {
14769+ for (int id = 0; id < n_ids; ++id) {
14770+ const int32_t i02 = *(const int32_t *) ((const char *) ids->data + iid1*ids->nb[1] + id*ids->nb[0]);
14771+ if (i02 < 0 || i02 >= n_as) {
14772+ // This is needed for SER. If fewer experts have been activated for this row, we need to
14773+ // clear it, else there could be garbage that leads to NaNs later on.
14774+ memset((char *)dst->data + id*dst->nb[1] + iid1*dst->nb[2], 0, dst->ne[0]*sizeof(float));
14775+ }
14776+ }
14777+ }
14778+
1476214779 if (ith == 0) {
1476314780 // initialize matrix_row_counts
1476414781 memset(matrix_row_counts, 0, n_as*sizeof(int64_t));
@@ -15012,6 +15029,18 @@ static void ggml_compute_forward_mul_mat_id_up_gate(
1501215029
1501315030#define MMID_MATRIX_ROW(row_id, i1) matrix_rows[(row_id)*ne12 + (i1)]
1501415031
15032+ GGML_ASSERT(ids->ne[1] == dst->ne[2]);
15033+ for (int64_t iid1 = ith; iid1 < ids->ne[1]; iid1 += nth) {
15034+ for (int id = 0; id < n_ids; ++id) {
15035+ const int32_t i02 = *(const int32_t *) ((const char *) ids->data + iid1*ids->nb[1] + id*ids->nb[0]);
15036+ if (i02 < 0 || i02 >= n_as) {
15037+ // This is needed for SER. If fewer experts have been activated for this row, we need to
15038+ // clear it, else there could be garbage that leads to NaNs later on.
15039+ memset((char *)dst->data + id*dst->nb[1] + iid1*dst->nb[2], 0, dst->ne[0]*sizeof(float));
15040+ }
15041+ }
15042+ }
15043+
1501515044 if (ith == 0) {
1501615045 // initialize matrix_row_counts
1501715046 memset(matrix_row_counts, 0, n_as*sizeof(int64_t));
@@ -15916,7 +15945,7 @@ static void ggml_compute_forward_get_rows_f16(
1591615945 (const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
1591715946 (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
1591815947 } else {
15919- memset((char *) src0 ->data + i01*nb01 + i11*nb02 + i12*nb03 , 0, nc*sizeof(float));
15948+ memset((char *) dst ->data + i10*nb1 + i11*nb2 + i12*nb3 , 0, nc*sizeof(float));
1592015949 }
1592115950
1592215951 }
@@ -15960,7 +15989,7 @@ static void ggml_compute_forward_get_rows_bf16(
1596015989 (const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
1596115990 (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
1596215991 } else {
15963- memset((char *) src0 ->data + i01*nb01 + i11*nb02 + i12*nb03 , 0, nc*sizeof(float));
15992+ memset((char *) dst ->data + i10*nb1 + i11*nb2 + i12*nb3 , 0, nc*sizeof(float));
1596415993 }
1596515994 }
1596615995}
0 commit comments