ggml-org
diff --git a/‎ggml/src/ggml-cuda/common.cuh‎
Lines changed: 13 additions & 0 deletions b/‎ggml/src/ggml-cuda/common.cuh‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎ggml/src/ggml-cuda/convert.cuh‎
Lines changed: 1 addition & 0 deletions b/‎ggml/src/ggml-cuda/convert.cuh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ggml/src/ggml-cuda/ggml-cuda.cu‎
Lines changed: 352 additions & 1 deletion b/‎ggml/src/ggml-cuda/ggml-cuda.cu‎
Lines changed: 352 additions & 1 deletion
@@ -1005,3 +1005,16 @@ struct ggml_backend_cuda_context {
         return pool(device);
     }
 };
+
+struct ggml_cuda_mm_fusion_args_host {
+    const ggml_tensor * x_bias = nullptr;
+    const ggml_tensor * gate = nullptr;
+    const ggml_tensor * gate_bias = nullptr;
+    ggml_glu_op glu_op;
+};
+struct ggml_cuda_mm_fusion_args_device {
+    const void * x_bias = nullptr;
+    const void * gate = nullptr;
+    const void * gate_bias = nullptr;
+    ggml_glu_op glu_op;
+};
@@ -1,3 +1,4 @@
+#pragma once
 #include "common.cuh"
 
 #define CUDA_DEQUANTIZE_BLOCK_SIZE 256
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+#pragma once`
`1`	`2`	`#include "common.cuh"`
`2`	`3`
`3`	`4`	`#define CUDA_DEQUANTIZE_BLOCK_SIZE 256`