Skip to content

Commit f77c13b

Browse files
authored
CUDA: General GEMV fusion (#16715)
1 parent 3cfa9c3 commit f77c13b

File tree

11 files changed

+1096
-166
lines changed

11 files changed

+1096
-166
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,3 +1005,16 @@ struct ggml_backend_cuda_context {
10051005
return pool(device);
10061006
}
10071007
};
1008+
1009+
struct ggml_cuda_mm_fusion_args_host {
1010+
const ggml_tensor * x_bias = nullptr;
1011+
const ggml_tensor * gate = nullptr;
1012+
const ggml_tensor * gate_bias = nullptr;
1013+
ggml_glu_op glu_op;
1014+
};
1015+
struct ggml_cuda_mm_fusion_args_device {
1016+
const void * x_bias = nullptr;
1017+
const void * gate = nullptr;
1018+
const void * gate_bias = nullptr;
1019+
ggml_glu_op glu_op;
1020+
};

ggml/src/ggml-cuda/convert.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#pragma once
12
#include "common.cuh"
23

34
#define CUDA_DEQUANTIZE_BLOCK_SIZE 256

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 352 additions & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)