We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 6c7139f commit a0c67c7Copy full SHA for a0c67c7
ggml/src/ggml-cuda/mmvf.cu
@@ -147,7 +147,10 @@ static __global__ void mul_mat_vec_f(
147
const nv_bfloat162 * gate_x2 = has_gate ? (const nv_bfloat162 *) gate_x : nullptr;
148
for (int col2 = tid; col2 < ncols2; col2 += block_size) {
149
const nv_bfloat162 tmpx = x2[col2];
150
- const nv_bfloat162 tmpx_gate = has_gate ? gate_x2[col2] : make_bfloat162(0.0f, 0.0f);
+ nv_bfloat162 tmpx_gate;
151
+ if constexpr (has_gate) {
152
+ tmpx_gate = gate_x2[col2];
153
+ }
154
#pragma unroll
155
for (int j = 0; j < ncols_dst; ++j) {
156
const float2 tmpy = y2[j*stride_col_y2 + col2];
0 commit comments