Skip to content

Commit 3ed3e7b

Browse files
committed
reverted sequence mode for rwkv due to multiple issues with speed loss with bigger quantized models
1 parent f83b666 commit 3ed3e7b

File tree

4 files changed

+5
-5
lines changed

4 files changed

+5
-5
lines changed

ggml.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@
194194
#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
195195

196196
#define GGML_MAX_DIMS 4
197-
#define GGML_MAX_NODES 16384
197+
#define GGML_MAX_NODES 4096
198198
#define GGML_MAX_PARAMS 256
199199
#define GGML_MAX_CONTEXTS 64
200200
#define GGML_MAX_OPT 4

gpttype_adapter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -479,7 +479,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
479479
}
480480
else
481481
{
482-
n_batch = 8; //use sequence mode to speedup
482+
n_batch = 1; //do not use sequence mode to speedup until it is fixed
483483

484484
//setup buffers for rwkv state
485485
auto padding = 512u;

koboldcpp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def utfprint(str):
224224
maxlen = 256
225225
modelbusy = False
226226
defaultport = 5001
227-
KcppVersion = "1.30.2"
227+
KcppVersion = "1.30.3"
228228

229229
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
230230
sys_version = ""

otherarch/rwkv_v3.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -484,8 +484,8 @@ struct rwkv_ggml_context {
484484
return;
485485
}
486486

487-
const size_t memory_required_overhead = size_t(256) * 1024 * 1024;
488-
const size_t memory_required_overhead_sc = size_t(128) * 1024 * 1024;
487+
const size_t memory_required_overhead = size_t(128) * 1024 * 1024;
488+
const size_t memory_required_overhead_sc = size_t(64) * 1024 * 1024;
489489

490490
ctx = ggml_init({ size.objects_count * GGML_OBJECT_SIZE + size.objects_size + memory_required_overhead, NULL, false});
491491

0 commit comments

Comments
 (0)