Skip to content

Commit

Permalink
warp size fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
SlyEcho committed Jun 6, 2023
1 parent 33091a9 commit 5d6eb72
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,11 @@ typedef struct {
} block_q6_k;
static_assert(sizeof(block_q6_k) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_k block size/padding");

#if defined(GGML_USE_HIPBLAS)
#define WARP_SIZE warpSize
#else
#define WARP_SIZE 32
#endif

#define CUDA_MUL_BLOCK_SIZE 256

Expand Down Expand Up @@ -679,8 +683,8 @@ static __global__ void dequantize_mul_mat_vec(const void * vx, const float * y,
// sum up partial sums and write back result
__syncthreads();
#pragma unroll
for (int mask = 16; mask > 0; mask >>= 1) {
tmp += __shfl_xor_sync(0xffffffff, tmp, mask, 32);
for (int mask = WARP_SIZE/2; mask > 0; mask >>= 1) {
tmp += __shfl_xor_sync(0xffffffff, tmp, mask, WARP_SIZE);
}

if (tid == 0) {
Expand Down

0 comments on commit 5d6eb72

Please sign in to comment.