@@ -6309,14 +6309,14 @@ static void dequantize_row_q3_K_cuda(const void * vx, dst_t * y, const int k, cu
63096309}
63106310
63116311template <typename dst_t >
6312- static void dequantize_q4_0_cuda (const void * vx, dst_t * y, const int k, cudaStream_t stream) {
6312+ static void dequantize_row_q4_0_cuda (const void * vx, dst_t * y, const int k, cudaStream_t stream) {
63136313 const int nb32 = k / 32 ;
63146314 const int nb = (k + 255 ) / 256 ;
63156315 dequantize_block_q4_0<<<nb, 32 , 0 , stream>>> (vx, y, nb32);
63166316}
63176317
63186318template <typename dst_t >
6319- static void dequantize_q4_1_cuda (const void * vx, dst_t * y, const int k, cudaStream_t stream) {
6319+ static void dequantize_row_q4_1_cuda (const void * vx, dst_t * y, const int k, cudaStream_t stream) {
63206320 const int nb32 = k / 32 ;
63216321 const int nb = (k + 255 ) / 256 ;
63226322 dequantize_block_q4_1<<<nb, 32 , 0 , stream>>> (vx, y, nb32);
@@ -6370,9 +6370,9 @@ static to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type) {
63706370 int id;
63716371 switch (type) {
63726372 case GGML_TYPE_Q4_0:
6373- return dequantize_q4_0_cuda ;
6373+ return dequantize_row_q4_0_cuda ;
63746374 case GGML_TYPE_Q4_1:
6375- return dequantize_q4_1_cuda ;
6375+ return dequantize_row_q4_1_cuda ;
63766376 case GGML_TYPE_Q5_0:
63776377 return dequantize_block_cuda<QK5_0, QR5_0, dequantize_q5_0>;
63786378 case GGML_TYPE_Q5_1:
@@ -6407,9 +6407,9 @@ static to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type) {
64076407static to_fp32_cuda_t ggml_get_to_fp32_cuda (ggml_type type) {
64086408 switch (type) {
64096409 case GGML_TYPE_Q4_0:
6410- return dequantize_q4_0_cuda ;
6410+ return dequantize_row_q4_0_cuda ;
64116411 case GGML_TYPE_Q4_1:
6412- return dequantize_q4_1_cuda ;
6412+ return dequantize_row_q4_1_cuda ;
64136413 case GGML_TYPE_Q5_0:
64146414 return dequantize_block_cuda<QK5_0, QR5_0, dequantize_q5_0>;
64156415 case GGML_TYPE_Q5_1:
0 commit comments