From d766c5a62e53776133ef2145fb05ca815b9f33d1 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 13 Apr 2024 13:53:59 +0200 Subject: [PATCH 01/13] imatrix: save the dataset file used in the output file --- examples/imatrix/imatrix.cpp | 74 ++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 33 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 73609d3e6ae00..18ea8016ff104 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -23,6 +23,7 @@ struct Stats { }; struct StatParams { + std::string dataset; std::string ofile = "imatrix.dat"; int n_output_frequency = 10; int verbosity = 1; @@ -46,7 +47,7 @@ class IMatrixCollector { std::vector m_src1_data; std::vector m_ids; // the expert ids from ggml_mul_mat_id // - void save_imatrix(const char * file_name) const; + void save_imatrix(const char * file_name, const char * dataset) const; void keep_imatrix(int ncall) const; }; @@ -184,7 +185,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * } void IMatrixCollector::save_imatrix() const { - save_imatrix(m_params.ofile.empty() ? "imatrix.dat" : m_params.ofile.c_str()); + save_imatrix(m_params.ofile.empty() ? "imatrix.dat" : m_params.ofile.c_str(), m_params.dataset.c_str()); } void IMatrixCollector::keep_imatrix(int ncall) const { @@ -192,24 +193,30 @@ void IMatrixCollector::keep_imatrix(int ncall) const { if (file_name.empty()) file_name = "imatrix.dat"; file_name += ".at_"; file_name += std::to_string(ncall); - save_imatrix(file_name.c_str()); + save_imatrix(file_name.c_str(), m_params.dataset.c_str()); } -void IMatrixCollector::save_imatrix(const char * fname) const { +void IMatrixCollector::save_imatrix(const char * fname, const char * dataset) const { std::ofstream out(fname, std::ios::binary); int n_entries = m_stats.size(); - out.write((const char*)&n_entries, sizeof(n_entries)); - for (auto& p : m_stats) { + out.write((const char *) &n_entries, sizeof(n_entries)); + for (const auto & p : m_stats) { int len = p.first.size(); - out.write((const char*)&len, sizeof(len)); + out.write((const char *) &len, sizeof(len)); out.write(p.first.c_str(), len); - out.write((const char*)&p.second.ncall, sizeof(p.second.ncall)); + out.write((const char *) &p.second.ncall, sizeof(p.second.ncall)); int nval = p.second.values.size(); - out.write((const char*)&nval, sizeof(nval)); - if (nval > 0) out.write((const char*)p.second.values.data(), nval*sizeof(float)); + out.write((const char *) &nval, sizeof(nval)); + if (nval > 0) out.write((const char *) p.second.values.data(), nval * sizeof(float)); } + + // Write the dataset name at the end of the file to later on specify it in quantize + int n_dataset = strlen(dataset); + out.write((const char *) &n_dataset, sizeof(n_dataset)); + out.write(dataset, n_dataset); + if (m_params.verbosity > 0) { - fprintf(stderr, "\n%s: stored collected data after %d chunks in %s\n",__func__,m_last_call,fname); + fprintf(stderr, "\n%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname); } } @@ -532,6 +539,29 @@ int main(int argc, char ** argv) { } } + gpt_params params; + params.n_batch = 512; + if (!gpt_params_parse(args.size(), args.data(), params)) { + return 1; + } + + params.logits_all = true; + params.n_batch = std::min(params.n_batch, params.n_ctx); + + print_build_info(); + + if (params.seed == LLAMA_DEFAULT_SEED) { + params.seed = time(NULL); + } + + fprintf(stderr, "%s: seed = %u\n", __func__, params.seed); + + std::mt19937 rng(params.seed); + if (params.random_prompt) { + params.prompt = gpt_random_prompt(rng); + } + + sparams.dataset = params.prompt_file; g_collector.set_parameters(std::move(sparams)); if (!combine_files.empty()) { @@ -570,28 +600,6 @@ int main(int argc, char ** argv) { } } - gpt_params params; - params.n_batch = 512; - if (!gpt_params_parse(args.size(), args.data(), params)) { - return 1; - } - - params.logits_all = true; - params.n_batch = std::min(params.n_batch, params.n_ctx); - - print_build_info(); - - if (params.seed == LLAMA_DEFAULT_SEED) { - params.seed = time(NULL); - } - - fprintf(stderr, "%s: seed = %u\n", __func__, params.seed); - - std::mt19937 rng(params.seed); - if (params.random_prompt) { - params.prompt = gpt_random_prompt(rng); - } - llama_backend_init(); llama_numa_init(params.numa); From 01e779593004fb767b4578c46d30954f111d4368 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 13 Apr 2024 14:49:23 +0200 Subject: [PATCH 02/13] llama: support kv overrides type string string --- llama.cpp | 6 ++++++ llama.h | 6 ++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/llama.cpp b/llama.cpp index b93c1abcd85d6..29caae419c19e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2801,6 +2801,7 @@ namespace GGUFMeta { case LLAMA_KV_OVERRIDE_TYPE_BOOL: return "bool"; case LLAMA_KV_OVERRIDE_TYPE_INT: return "int"; case LLAMA_KV_OVERRIDE_TYPE_FLOAT: return "float"; + case LLAMA_KV_OVERRIDE_TYPE_STR: return "str"; } return "unknown"; } @@ -2820,6 +2821,9 @@ namespace GGUFMeta { case LLAMA_KV_OVERRIDE_TYPE_FLOAT: { LLAMA_LOG_INFO("%.6f\n", ovrd->float_value); } break; + case LLAMA_KV_OVERRIDE_TYPE_STR: { + LLAMA_LOG_INFO("%s\n", ovrd->str_value); + } break; default: // Shouldn't be possible to end up here, but just in case... throw std::runtime_error( @@ -13698,6 +13702,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s gguf_set_val_i32(ctx_out, o.key, o.int_value); } else if (o.tag == LLAMA_KV_OVERRIDE_TYPE_BOOL) { gguf_set_val_bool(ctx_out, o.key, o.bool_value); + } else if (o.tag == LLAMA_KV_OVERRIDE_TYPE_STR) { + gguf_set_val_str(ctx_out, o.key, o.str_value); } else { LLAMA_LOG_WARN("%s: unknown KV override type for key %s\n", __func__, o.key); } diff --git a/llama.h b/llama.h index b5da686f7b7e5..89428501141e3 100644 --- a/llama.h +++ b/llama.h @@ -195,6 +195,7 @@ extern "C" { LLAMA_KV_OVERRIDE_TYPE_INT, LLAMA_KV_OVERRIDE_TYPE_FLOAT, LLAMA_KV_OVERRIDE_TYPE_BOOL, + LLAMA_KV_OVERRIDE_TYPE_STR, }; struct llama_model_kv_override { @@ -202,8 +203,9 @@ extern "C" { enum llama_model_kv_override_type tag; union { int64_t int_value; - double float_value; - bool bool_value; + double float_value; + bool bool_value; + char * str_value = nullptr; }; }; From a9202fb155de9a5939f62588c16698be920cd88c Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 13 Apr 2024 14:49:48 +0200 Subject: [PATCH 03/13] common: factorize KV Overrides parsing between common and server --- common/common.cpp | 82 ++++++++++++++++++++------------------ common/common.h | 2 + examples/server/server.cpp | 36 +---------------- 3 files changed, 48 insertions(+), 72 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index dda514785171b..ca10ae098ac38 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -157,6 +157,48 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { return result; } +bool parse_kv_override(const char * data, std::vector & overrides) { + const char* sep = strchr(data, '='); + if (sep == nullptr || sep - data >= 128) { + fprintf(stderr, "%s: malformed KV override '%s'\n", __func__, data); + return false; + } + llama_model_kv_override kvo; + std::strncpy(kvo.key, data, sep - data); + kvo.key[sep - data] = 0; + sep++; + if (strncmp(sep, "int:", 4) == 0) { + sep += 4; + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT; + kvo.int_value = std::atol(sep); + } else if (strncmp(sep, "float:", 6) == 0) { + sep += 6; + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT; + kvo.float_value = std::atof(sep); + } else if (strncmp(sep, "bool:", 5) == 0) { + sep += 5; + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL; + if (std::strcmp(sep, "true") == 0) { + kvo.bool_value = true; + } else if (std::strcmp(sep, "false") == 0) { + kvo.bool_value = false; + } else { + fprintf(stderr, "%s: invalid boolean value for KV override '%s'\n", __func__, data); + return false; + } + + } else if (strncmp(sep, "str:", 4) == 0) { + sep += 4; + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; + kvo.str_value = strdup(sep); + } else { + fprintf(stderr, "%s: invalid type for KV override '%s'\n", __func__, data); + return false; + } + overrides.emplace_back(std::move(kvo)); + return true; +} + bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param) { llama_sampling_params& sparams = params.sparams; @@ -1153,47 +1195,11 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa invalid_param = true; return true; } - char* sep = strchr(argv[i], '='); - if (sep == nullptr || sep - argv[i] >= 128) { - fprintf(stderr, "error: Malformed KV override: %s\n", argv[i]); - invalid_param = true; - return true; - } - struct llama_model_kv_override kvo; - std::strncpy(kvo.key, argv[i], sep - argv[i]); - kvo.key[sep - argv[i]] = 0; - sep++; - if (strncmp(sep, "int:", 4) == 0) { - sep += 4; - kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT; - kvo.int_value = std::atol(sep); - } - else if (strncmp(sep, "float:", 6) == 0) { - sep += 6; - kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT; - kvo.float_value = std::atof(sep); - } - else if (strncmp(sep, "bool:", 5) == 0) { - sep += 5; - kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL; - if (std::strcmp(sep, "true") == 0) { - kvo.bool_value = true; - } - else if (std::strcmp(sep, "false") == 0) { - kvo.bool_value = false; - } - else { - fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]); - invalid_param = true; - return true; - } - } - else { + if (!parse_kv_override(argv[i], params.kv_overrides)) { fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]); invalid_param = true; return true; } - params.kv_overrides.push_back(kvo); return true; } #ifndef LOG_DISABLE_LOGS @@ -1461,7 +1467,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" path to dynamic lookup cache to use for lookup decoding (updated by generation)\n"); printf(" --override-kv KEY=TYPE:VALUE\n"); printf(" advanced option to override model metadata by key. may be specified multiple times.\n"); - printf(" types: int, float, bool. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n"); + printf(" types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n"); printf(" -ptc N, --print-token-count N\n"); printf(" print token count every N tokens (default: %d)\n", params.n_print); printf("\n"); diff --git a/common/common.h b/common/common.h index 65272b0baaa41..aa3c06a68d556 100644 --- a/common/common.h +++ b/common/common.h @@ -169,6 +169,8 @@ struct gpt_params { std::string image = ""; // path to an image file }; +bool parse_kv_override(const char * data, std::vector & overrides); + bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params); bool gpt_params_parse(int argc, char ** argv, gpt_params & params); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 634e653ada284..64f4da94d9908 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2372,7 +2372,7 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co printf(" -n, --n-predict maximum tokens to predict (default: %d)\n", params.n_predict); printf(" --override-kv KEY=TYPE:VALUE\n"); printf(" advanced option to override model metadata by key. may be specified multiple times.\n"); - printf(" types: int, float, bool. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n"); + printf(" types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n"); printf(" -gan N, --grp-attn-n N set the group attention factor to extend context size through self-extend(default: 1=disabled), used together with group attention width `--grp-attn-w`\n"); printf(" -gaw N, --grp-attn-w N set the group attention width to extend context size through self-extend(default: 512), used together with group attention factor `--grp-attn-n`\n"); printf(" --chat-template JINJA_TEMPLATE\n"); @@ -2803,43 +2803,11 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams, invalid_param = true; break; } - char * sep = strchr(argv[i], '='); - if (sep == nullptr || sep - argv[i] >= 128) { - fprintf(stderr, "error: Malformed KV override: %s\n", argv[i]); - invalid_param = true; - break; - } - - struct llama_model_kv_override kvo; - std::strncpy(kvo.key, argv[i], sep - argv[i]); - kvo.key[sep - argv[i]] = 0; - sep++; - if (strncmp(sep, "int:", 4) == 0) { - sep += 4; - kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT; - kvo.int_value = std::atol(sep); - } else if (strncmp(sep, "float:", 6) == 0) { - sep += 6; - kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT; - kvo.float_value = std::atof(sep); - } else if (strncmp(sep, "bool:", 5) == 0) { - sep += 5; - kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL; - if (std::strcmp(sep, "true") == 0) { - kvo.bool_value = true; - } else if (std::strcmp(sep, "false") == 0) { - kvo.bool_value = false; - } else { - fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]); - invalid_param = true; - break; - } - } else { + if (!parse_kv_override(argv[i], params.kv_overrides)) { fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]); invalid_param = true; break; } - params.kv_overrides.push_back(kvo); } else { fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); server_print_usage(argv[0], default_params, default_sparams); From 262c95ab6349e82af50584e6019c4a9d3424cad9 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 13 Apr 2024 14:50:32 +0200 Subject: [PATCH 04/13] quantize: add imatrix n entries and dataset KV metadata quantize: factorize KV Overrides parsing between common #6656 --- Makefile | 2 +- examples/quantize/CMakeLists.txt | 2 +- examples/quantize/quantize.cpp | 74 ++++++++++++++------------------ 3 files changed, 35 insertions(+), 43 deletions(-) diff --git a/Makefile b/Makefile index 7a69ad1b3c14f..61e2e005a0590 100644 --- a/Makefile +++ b/Makefile @@ -760,7 +760,7 @@ batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml. $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) -quantize: examples/quantize/quantize.cpp build-info.o ggml.o llama.o $(OBJS) +quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) diff --git a/examples/quantize/CMakeLists.txt b/examples/quantize/CMakeLists.txt index 6f374a2bd3b46..6b977fde86ab2 100644 --- a/examples/quantize/CMakeLists.txt +++ b/examples/quantize/CMakeLists.txt @@ -1,6 +1,6 @@ set(TARGET quantize) add_executable(${TARGET} quantize.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE llama build_info ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT}) target_include_directories(${TARGET} PRIVATE ../../common) target_compile_features(${TARGET} PRIVATE cxx_std_11) diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index 64cb6db19d004..7d5e1d5bb332a 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -8,7 +8,6 @@ #include #include #include -#include struct quant_option { std::string name; @@ -53,6 +52,8 @@ static const std::vector QUANT_OPTIONS = { { "COPY", LLAMA_FTYPE_ALL_F32, "only copy tensors, no quantizing", }, }; +static const char * const LLM_KV_QUANTIZE_IMATRIX_DATASET = "quantize.imatrix.dataset"; +static const char * const LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES = "quantize.imatrix.n_entries"; static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) { std::string ftype_str; @@ -112,7 +113,7 @@ static void usage(const char * executable) { exit(1); } -static void load_imatrix(const std::string & imatrix_file, std::unordered_map> & imatrix_data) { +static void load_imatrix(const std::string & imatrix_file, std::string & imatrix_dataset, std::unordered_map> & imatrix_data) { std::ifstream in(imatrix_file.c_str(), std::ios::binary); if (!in) { printf("%s: failed to open %s\n",__func__, imatrix_file.c_str()); @@ -159,15 +160,27 @@ static void load_imatrix(const std::string & imatrix_file, std::unordered_map dataset_as_vec(dataset_len+1); + in.read((char *)dataset_as_vec.data(), dataset_len); + dataset_as_vec[dataset_len] = 0; + imatrix_dataset = std::string{dataset_as_vec.data()}; + printf("%s: imatrix dataset='%s'\n", __func__, imatrix_dataset.c_str()); + } printf("%s: loaded %d importance matrix entries from %s\n", __func__, int(imatrix_data.size()), imatrix_file.c_str()); } static void prepare_imatrix(const std::string & imatrix_file, + std::string & imatrix_dataset, const std::vector & included_weights, const std::vector & excluded_weights, std::unordered_map> & imatrix_data) { if (!imatrix_file.empty()) { - load_imatrix(imatrix_file, imatrix_data); + load_imatrix(imatrix_file, imatrix_dataset, imatrix_data); } if (imatrix_data.empty()) { return; @@ -210,43 +223,6 @@ static ggml_type parse_ggml_type(const char * arg) { return result; } -static bool parse_kv_override(const char * data, std::vector & overrides) { - const char* sep = strchr(data, '='); - if (sep == nullptr || sep - data >= 128) { - fprintf(stderr, "%s: malformed KV override '%s'\n", __func__, data); - return false; - } - llama_model_kv_override kvo; - std::strncpy(kvo.key, data, sep - data); - kvo.key[sep - data] = 0; - sep++; - if (strncmp(sep, "int:", 4) == 0) { - sep += 4; - kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT; - kvo.int_value = std::atol(sep); - } else if (strncmp(sep, "float:", 6) == 0) { - sep += 6; - kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT; - kvo.float_value = std::atof(sep); - } else if (strncmp(sep, "bool:", 5) == 0) { - sep += 5; - kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL; - if (std::strcmp(sep, "true") == 0) { - kvo.bool_value = true; - } else if (std::strcmp(sep, "false") == 0) { - kvo.bool_value = false; - } else { - fprintf(stderr, "%s: invalid boolean value for KV override '%s'\n", __func__, data); - return false; - } - } else { - fprintf(stderr, "%s: invalid type for KV override '%s'\n", __func__, data); - return false; - } - overrides.emplace_back(std::move(kvo)); - return true; -} - int main(int argc, char ** argv) { if (argc < 3) { usage(argv[0]); @@ -313,10 +289,26 @@ int main(int argc, char ** argv) { usage(argv[0]); } + std::string imatrix_dataset; std::unordered_map> imatrix_data; - prepare_imatrix(imatrix_file, included_weights, excluded_weights, imatrix_data); + prepare_imatrix(imatrix_file, imatrix_dataset, included_weights, excluded_weights, imatrix_data); if (!imatrix_data.empty()) { params.imatrix = &imatrix_data; + if (!imatrix_dataset.empty()) { + llama_model_kv_override kvo; + std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_DATASET); + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; + kvo.str_value = strdup(imatrix_dataset.c_str()); + kv_overrides.emplace_back(std::move(kvo)); + } + + { + llama_model_kv_override kvo; + std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES); + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT; + kvo.int_value = imatrix_data.size(); + kv_overrides.emplace_back(std::move(kvo)); + } } if (!kv_overrides.empty()) { kv_overrides.emplace_back(); From cbc43aa411c7ca7064612838cc6711ee1381fbb9 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 13 Apr 2024 15:12:03 +0200 Subject: [PATCH 05/13] llama: remove kv override str_value initialization as it does not compile on some toolchain --- llama.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.h b/llama.h index 89428501141e3..0cc0d3f291428 100644 --- a/llama.h +++ b/llama.h @@ -205,7 +205,7 @@ extern "C" { int64_t int_value; double float_value; bool bool_value; - char * str_value = nullptr; + char * str_value; }; }; From 851de160dd3b4b3f48eac94e914aaa861eaa2ddd Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 13 Apr 2024 18:03:10 +0200 Subject: [PATCH 06/13] quantize: add imatrix m_last_call as `quantize.imatrix.chunks_count` --- examples/imatrix/imatrix.cpp | 3 +++ examples/quantize/quantize.cpp | 28 +++++++++++++++++++++------- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 18ea8016ff104..b2d81311536a8 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -210,6 +210,9 @@ void IMatrixCollector::save_imatrix(const char * fname, const char * dataset) co if (nval > 0) out.write((const char *) p.second.values.data(), nval * sizeof(float)); } + // Write the number of call the matrix was computed with + out.write((const char *) &m_last_call, sizeof(m_last_call)); + // Write the dataset name at the end of the file to later on specify it in quantize int n_dataset = strlen(dataset); out.write((const char *) &n_dataset, sizeof(n_dataset)); diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index 7d5e1d5bb332a..dcc1fec39347c 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -53,7 +53,8 @@ static const std::vector QUANT_OPTIONS = { }; static const char * const LLM_KV_QUANTIZE_IMATRIX_DATASET = "quantize.imatrix.dataset"; -static const char * const LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES = "quantize.imatrix.n_entries"; +static const char * const LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES = "quantize.imatrix.entries_count"; +static const char * const LLM_KV_QUANTIZE_IMATRIX_N_CHUNKS = "quantize.imatrix.chunks_count"; static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) { std::string ftype_str; @@ -113,7 +114,7 @@ static void usage(const char * executable) { exit(1); } -static void load_imatrix(const std::string & imatrix_file, std::string & imatrix_dataset, std::unordered_map> & imatrix_data) { +static int load_imatrix(const std::string & imatrix_file, std::string & imatrix_dataset, std::unordered_map> & imatrix_data) { std::ifstream in(imatrix_file.c_str(), std::ios::binary); if (!in) { printf("%s: failed to open %s\n",__func__, imatrix_file.c_str()); @@ -162,7 +163,9 @@ static void load_imatrix(const std::string & imatrix_file, std::string & imatrix } // latest imatrix version contains the dataset filename at the end of the file + int m_last_call = 0; if (in.peek() != EOF) { + in.read((char *)&m_last_call, sizeof(m_last_call)); int dataset_len; in.read((char *)&dataset_len, sizeof(dataset_len)); std::vector dataset_as_vec(dataset_len+1); @@ -171,19 +174,21 @@ static void load_imatrix(const std::string & imatrix_file, std::string & imatrix imatrix_dataset = std::string{dataset_as_vec.data()}; printf("%s: imatrix dataset='%s'\n", __func__, imatrix_dataset.c_str()); } - printf("%s: loaded %d importance matrix entries from %s\n", __func__, int(imatrix_data.size()), imatrix_file.c_str()); + printf("%s: loaded %d importance matrix entries from %s computed on %d chunks\n", __func__, int(imatrix_data.size()), imatrix_file.c_str(), m_last_call); + return m_last_call; } -static void prepare_imatrix(const std::string & imatrix_file, +static int prepare_imatrix(const std::string & imatrix_file, std::string & imatrix_dataset, const std::vector & included_weights, const std::vector & excluded_weights, std::unordered_map> & imatrix_data) { + int m_last_call = -1; if (!imatrix_file.empty()) { - load_imatrix(imatrix_file, imatrix_dataset, imatrix_data); + m_last_call = load_imatrix(imatrix_file, imatrix_dataset, imatrix_data); } if (imatrix_data.empty()) { - return; + return m_last_call; } if (!excluded_weights.empty()) { for (auto& name : excluded_weights) { @@ -209,6 +214,7 @@ static void prepare_imatrix(const std::string & imatrix_file, if (!imatrix_data.empty()) { printf("%s: have %d importance matrix entries\n", __func__, int(imatrix_data.size())); } + return m_last_call; } static ggml_type parse_ggml_type(const char * arg) { @@ -291,7 +297,7 @@ int main(int argc, char ** argv) { std::string imatrix_dataset; std::unordered_map> imatrix_data; - prepare_imatrix(imatrix_file, imatrix_dataset, included_weights, excluded_weights, imatrix_data); + int m_last_call = prepare_imatrix(imatrix_file, imatrix_dataset, included_weights, excluded_weights, imatrix_data); if (!imatrix_data.empty()) { params.imatrix = &imatrix_data; if (!imatrix_dataset.empty()) { @@ -309,6 +315,14 @@ int main(int argc, char ** argv) { kvo.int_value = imatrix_data.size(); kv_overrides.emplace_back(std::move(kvo)); } + + if (m_last_call > 0) { + llama_model_kv_override kvo; + std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_N_CHUNKS); + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT; + kvo.int_value = m_last_call; + kv_overrides.emplace_back(std::move(kvo)); + } } if (!kv_overrides.empty()) { kv_overrides.emplace_back(); From 0d82da6f79b3031ad5c0250d2d3af5df1d7b31a3 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 13 Apr 2024 19:06:50 +0200 Subject: [PATCH 07/13] quantize: add imatrix filename in KV --- examples/quantize/quantize.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index dcc1fec39347c..a0d9a2f46f6f4 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -52,6 +52,7 @@ static const std::vector QUANT_OPTIONS = { { "COPY", LLAMA_FTYPE_ALL_F32, "only copy tensors, no quantizing", }, }; +static const char * const LLM_KV_QUANTIZE_IMATRIX_FILE = "quantize.imatrix.file"; static const char * const LLM_KV_QUANTIZE_IMATRIX_DATASET = "quantize.imatrix.dataset"; static const char * const LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES = "quantize.imatrix.entries_count"; static const char * const LLM_KV_QUANTIZE_IMATRIX_N_CHUNKS = "quantize.imatrix.chunks_count"; @@ -300,6 +301,13 @@ int main(int argc, char ** argv) { int m_last_call = prepare_imatrix(imatrix_file, imatrix_dataset, included_weights, excluded_weights, imatrix_data); if (!imatrix_data.empty()) { params.imatrix = &imatrix_data; + { + llama_model_kv_override kvo; + std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_FILE); + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; + kvo.str_value = strdup(imatrix_file.c_str()); + kv_overrides.emplace_back(std::move(kvo)); + } if (!imatrix_dataset.empty()) { llama_model_kv_override kvo; std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_DATASET); From 82e4187f95f969b90b9e27c660cb5168c3e33ec7 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Fri, 19 Apr 2024 13:16:42 +0200 Subject: [PATCH 08/13] llama: add llama_model_kv_override_free --- common/common.cpp | 1 - llama.cpp | 18 ++++++++++++++++++ llama.h | 3 +++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index 29fa5cdbca6b4..9838a538fc62a 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -263,7 +263,6 @@ bool parse_kv_override(const char * data, std::vector & fprintf(stderr, "%s: invalid boolean value for KV override '%s'\n", __func__, data); return false; } - } else if (strncmp(sep, "str:", 4) == 0) { sep += 4; kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; diff --git a/llama.cpp b/llama.cpp index 8bf1fdbc6602b..2907283f4e82c 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2929,6 +2929,16 @@ namespace GGUFMeta { return false; } + template + static typename std::enable_if::value, char *>::type + try_override(T & target, const struct llama_model_kv_override * ovrd) { + if (validate_override(LLAMA_KV_OVERRIDE_TYPE_STR, ovrd)) { + target = ovrd->str_value; + return true; + } + return false; + } + template static typename std::enable_if::value, bool>::type try_override(T & target, const struct llama_model_kv_override * ovrd) { @@ -14977,6 +14987,14 @@ void llama_free_model(struct llama_model * model) { delete model; } +void llama_model_kv_override_free(struct llama_model_kv_override * kv_overrides) { + for (const struct llama_model_kv_override *p = kv_overrides; p->key[0] != 0; p++) { + if (p->tag == LLAMA_KV_OVERRIDE_TYPE_STR) { + delete p->str_value; + } + } +} + struct llama_context * llama_new_context_with_model( struct llama_model * model, struct llama_context_params params) { diff --git a/llama.h b/llama.h index 0cc0d3f291428..0f215f336c2e6 100644 --- a/llama.h +++ b/llama.h @@ -209,6 +209,9 @@ extern "C" { }; }; + // Frees all allocated memory + LLAMA_API void llama_model_kv_override_free(struct llama_model_kv_override * ctx); + struct llama_model_params { int32_t n_gpu_layers; // number of layers to store in VRAM enum llama_split_mode split_mode; // how to split the model across multiple GPUs From aa0e28f8fcc540e4c01b71ff65761551518edc27 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 20 Apr 2024 10:17:03 +0200 Subject: [PATCH 09/13] common: add llama_model_kv_override_free common: free kv override if used after model loading --- common/common.cpp | 12 ++++++++++++ common/common.h | 3 +++ llama.cpp | 8 -------- llama.h | 3 --- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 9838a538fc62a..c30b9e59fecd4 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -2232,6 +2232,10 @@ std::tuple llama_init_from_gpt_par return std::make_tuple(nullptr, nullptr); } + if (!params.kv_overrides.empty()) { + llama_model_kv_override_free(params.kv_overrides.data()); + } + auto cparams = llama_context_params_from_gpt_params(params); llama_context * lctx = llama_new_context_with_model(model, cparams); @@ -2952,3 +2956,11 @@ llama_control_vector_data llama_control_vector_load(const std::vectorkey[0] != 0; p++) { + if (p->tag == LLAMA_KV_OVERRIDE_TYPE_STR) { + delete p->str_value; + } + } +} diff --git a/common/common.h b/common/common.h index 562d3a1195f65..916023e2504a9 100644 --- a/common/common.h +++ b/common/common.h @@ -172,6 +172,9 @@ struct gpt_params { bool parse_kv_override(const char * data, std::vector & overrides); +// Frees all allocated memory +void llama_model_kv_override_free(struct llama_model_kv_override * ctx); + bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params); bool gpt_params_parse(int argc, char ** argv, gpt_params & params); diff --git a/llama.cpp b/llama.cpp index 2907283f4e82c..9d9f7b4e12628 100644 --- a/llama.cpp +++ b/llama.cpp @@ -14987,14 +14987,6 @@ void llama_free_model(struct llama_model * model) { delete model; } -void llama_model_kv_override_free(struct llama_model_kv_override * kv_overrides) { - for (const struct llama_model_kv_override *p = kv_overrides; p->key[0] != 0; p++) { - if (p->tag == LLAMA_KV_OVERRIDE_TYPE_STR) { - delete p->str_value; - } - } -} - struct llama_context * llama_new_context_with_model( struct llama_model * model, struct llama_context_params params) { diff --git a/llama.h b/llama.h index 0f215f336c2e6..0cc0d3f291428 100644 --- a/llama.h +++ b/llama.h @@ -209,9 +209,6 @@ extern "C" { }; }; - // Frees all allocated memory - LLAMA_API void llama_model_kv_override_free(struct llama_model_kv_override * ctx); - struct llama_model_params { int32_t n_gpu_layers; // number of layers to store in VRAM enum llama_split_mode split_mode; // how to split the model across multiple GPUs From 4bd26644bf690ac482d9c60bced2228e8ad7948a Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 20 Apr 2024 13:24:58 +0200 Subject: [PATCH 10/13] llama: finally move the string KV override value to the stack --- common/common.cpp | 14 +------------- common/common.h | 3 --- examples/quantize/quantize.cpp | 4 ++-- llama.h | 2 +- 4 files changed, 4 insertions(+), 19 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index c30b9e59fecd4..f07b4d1a4e95b 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -266,7 +266,7 @@ bool parse_kv_override(const char * data, std::vector & } else if (strncmp(sep, "str:", 4) == 0) { sep += 4; kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; - kvo.str_value = strdup(sep); + strncpy(kvo.str_value, sep, 128); } else { fprintf(stderr, "%s: invalid type for KV override '%s'\n", __func__, data); return false; @@ -2232,10 +2232,6 @@ std::tuple llama_init_from_gpt_par return std::make_tuple(nullptr, nullptr); } - if (!params.kv_overrides.empty()) { - llama_model_kv_override_free(params.kv_overrides.data()); - } - auto cparams = llama_context_params_from_gpt_params(params); llama_context * lctx = llama_new_context_with_model(model, cparams); @@ -2956,11 +2952,3 @@ llama_control_vector_data llama_control_vector_load(const std::vectorkey[0] != 0; p++) { - if (p->tag == LLAMA_KV_OVERRIDE_TYPE_STR) { - delete p->str_value; - } - } -} diff --git a/common/common.h b/common/common.h index 916023e2504a9..562d3a1195f65 100644 --- a/common/common.h +++ b/common/common.h @@ -172,9 +172,6 @@ struct gpt_params { bool parse_kv_override(const char * data, std::vector & overrides); -// Frees all allocated memory -void llama_model_kv_override_free(struct llama_model_kv_override * ctx); - bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params); bool gpt_params_parse(int argc, char ** argv, gpt_params & params); diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index a0d9a2f46f6f4..b6464be3dfd5d 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -305,14 +305,14 @@ int main(int argc, char ** argv) { llama_model_kv_override kvo; std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_FILE); kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; - kvo.str_value = strdup(imatrix_file.c_str()); + strncpy(kvo.str_value, imatrix_file.c_str(), 128); kv_overrides.emplace_back(std::move(kvo)); } if (!imatrix_dataset.empty()) { llama_model_kv_override kvo; std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_DATASET); kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; - kvo.str_value = strdup(imatrix_dataset.c_str()); + strncpy(kvo.str_value, imatrix_dataset.c_str(), 128); kv_overrides.emplace_back(std::move(kvo)); } diff --git a/llama.h b/llama.h index 0cc0d3f291428..73d9733f78cfa 100644 --- a/llama.h +++ b/llama.h @@ -201,11 +201,11 @@ extern "C" { struct llama_model_kv_override { char key[128]; enum llama_model_kv_override_type tag; + char str_value[128]; union { int64_t int_value; double float_value; bool bool_value; - char * str_value; }; }; From 5cf8ccb19191b829ce3701bf3a9d236457a2bf1a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 21 Apr 2024 20:06:30 +0300 Subject: [PATCH 11/13] llama : minor --- common/common.cpp | 14 ++++++------- examples/quantize/quantize.cpp | 8 ++++---- llama.cpp | 37 ++++++++++++---------------------- llama.h | 12 ++++++----- 4 files changed, 31 insertions(+), 40 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index f07b4d1a4e95b..277244e5ad0be 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -235,7 +235,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { } bool parse_kv_override(const char * data, std::vector & overrides) { - const char* sep = strchr(data, '='); + const char * sep = strchr(data, '='); if (sep == nullptr || sep - data >= 128) { fprintf(stderr, "%s: malformed KV override '%s'\n", __func__, data); return false; @@ -247,18 +247,18 @@ bool parse_kv_override(const char * data, std::vector & if (strncmp(sep, "int:", 4) == 0) { sep += 4; kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT; - kvo.int_value = std::atol(sep); + kvo.val_i64 = std::atol(sep); } else if (strncmp(sep, "float:", 6) == 0) { sep += 6; kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT; - kvo.float_value = std::atof(sep); + kvo.val_f64 = std::atof(sep); } else if (strncmp(sep, "bool:", 5) == 0) { sep += 5; kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL; if (std::strcmp(sep, "true") == 0) { - kvo.bool_value = true; + kvo.val_bool = true; } else if (std::strcmp(sep, "false") == 0) { - kvo.bool_value = false; + kvo.val_bool = false; } else { fprintf(stderr, "%s: invalid boolean value for KV override '%s'\n", __func__, data); return false; @@ -266,7 +266,7 @@ bool parse_kv_override(const char * data, std::vector & } else if (strncmp(sep, "str:", 4) == 0) { sep += 4; kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; - strncpy(kvo.str_value, sep, 128); + strncpy(kvo.val_str, sep, 128); } else { fprintf(stderr, "%s: invalid type for KV override '%s'\n", __func__, data); return false; @@ -276,7 +276,7 @@ bool parse_kv_override(const char * data, std::vector & } bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param) { - llama_sampling_params& sparams = params.sparams; + llama_sampling_params & sparams = params.sparams; if (arg == "-s" || arg == "--seed") { if (++i >= argc) { diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index b6464be3dfd5d..4419c04712c96 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -305,14 +305,14 @@ int main(int argc, char ** argv) { llama_model_kv_override kvo; std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_FILE); kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; - strncpy(kvo.str_value, imatrix_file.c_str(), 128); + strncpy(kvo.val_str, imatrix_file.c_str(), 128); kv_overrides.emplace_back(std::move(kvo)); } if (!imatrix_dataset.empty()) { llama_model_kv_override kvo; std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_DATASET); kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; - strncpy(kvo.str_value, imatrix_dataset.c_str(), 128); + strncpy(kvo.val_str, imatrix_dataset.c_str(), 128); kv_overrides.emplace_back(std::move(kvo)); } @@ -320,7 +320,7 @@ int main(int argc, char ** argv) { llama_model_kv_override kvo; std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES); kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT; - kvo.int_value = imatrix_data.size(); + kvo.val_i64 = imatrix_data.size(); kv_overrides.emplace_back(std::move(kvo)); } @@ -328,7 +328,7 @@ int main(int argc, char ** argv) { llama_model_kv_override kvo; std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_N_CHUNKS); kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT; - kvo.int_value = m_last_call; + kvo.val_i64 = m_last_call; kv_overrides.emplace_back(std::move(kvo)); } } diff --git a/llama.cpp b/llama.cpp index 9d9f7b4e12628..14c7f67416f1c 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2875,16 +2875,16 @@ namespace GGUFMeta { __func__, override_type_to_str(ovrd->tag), ovrd->key); switch (ovrd->tag) { case LLAMA_KV_OVERRIDE_TYPE_BOOL: { - LLAMA_LOG_INFO("%s\n", ovrd->bool_value ? "true" : "false"); + LLAMA_LOG_INFO("%s\n", ovrd->val_bool ? "true" : "false"); } break; case LLAMA_KV_OVERRIDE_TYPE_INT: { - LLAMA_LOG_INFO("%" PRId64 "\n", ovrd->int_value); + LLAMA_LOG_INFO("%" PRId64 "\n", ovrd->val_i64); } break; case LLAMA_KV_OVERRIDE_TYPE_FLOAT: { - LLAMA_LOG_INFO("%.6f\n", ovrd->float_value); + LLAMA_LOG_INFO("%.6f\n", ovrd->val_f64); } break; case LLAMA_KV_OVERRIDE_TYPE_STR: { - LLAMA_LOG_INFO("%s\n", ovrd->str_value); + LLAMA_LOG_INFO("%s\n", ovrd->val_str); } break; default: // Shouldn't be possible to end up here, but just in case... @@ -2903,7 +2903,7 @@ namespace GGUFMeta { static typename std::enable_if::value, bool>::type try_override(OT & target, const struct llama_model_kv_override * ovrd) { if (validate_override(LLAMA_KV_OVERRIDE_TYPE_BOOL, ovrd)) { - target = ovrd->bool_value; + target = ovrd->val_bool; return true; } return false; @@ -2913,7 +2913,7 @@ namespace GGUFMeta { static typename std::enable_if::value && std::is_integral::value, bool>::type try_override(OT & target, const struct llama_model_kv_override * ovrd) { if (validate_override(LLAMA_KV_OVERRIDE_TYPE_INT, ovrd)) { - target = ovrd->int_value; + target = ovrd->val_i64; return true; } return false; @@ -2923,33 +2923,22 @@ namespace GGUFMeta { static typename std::enable_if::value, bool>::type try_override(T & target, const struct llama_model_kv_override * ovrd) { if (validate_override(LLAMA_KV_OVERRIDE_TYPE_FLOAT, ovrd)) { - target = ovrd->float_value; + target = ovrd->val_f64; return true; } return false; } template - static typename std::enable_if::value, char *>::type + static typename std::enable_if::value, bool>::type try_override(T & target, const struct llama_model_kv_override * ovrd) { if (validate_override(LLAMA_KV_OVERRIDE_TYPE_STR, ovrd)) { - target = ovrd->str_value; + target = ovrd->val_str; return true; } return false; } - template - static typename std::enable_if::value, bool>::type - try_override(T & target, const struct llama_model_kv_override * ovrd) { - (void)target; - (void)ovrd; - if (!ovrd) { return false; } - // Currently, we should never end up here so it would be a bug if we do. - throw std::runtime_error(format("Unsupported attempt to override string type for metadata key %s\n", - ovrd ? ovrd->key : "NULL")); - } - static bool set(const gguf_context * ctx, const int k, T & target, const struct llama_model_kv_override * ovrd = nullptr) { if (try_override(target, ovrd)) { return true; @@ -14276,13 +14265,13 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s for (auto & o : overrides) { if (o.key[0] == 0) break; if (o.tag == LLAMA_KV_OVERRIDE_TYPE_FLOAT) { - gguf_set_val_f32(ctx_out, o.key, o.float_value); + gguf_set_val_f32(ctx_out, o.key, o.val_f64); } else if (o.tag == LLAMA_KV_OVERRIDE_TYPE_INT) { - gguf_set_val_i32(ctx_out, o.key, o.int_value); + gguf_set_val_i32(ctx_out, o.key, o.val_i64); } else if (o.tag == LLAMA_KV_OVERRIDE_TYPE_BOOL) { - gguf_set_val_bool(ctx_out, o.key, o.bool_value); + gguf_set_val_bool(ctx_out, o.key, o.val_bool); } else if (o.tag == LLAMA_KV_OVERRIDE_TYPE_STR) { - gguf_set_val_str(ctx_out, o.key, o.str_value); + gguf_set_val_str(ctx_out, o.key, o.val_str); } else { LLAMA_LOG_WARN("%s: unknown KV override type for key %s\n", __func__, o.key); } diff --git a/llama.h b/llama.h index 73d9733f78cfa..afe3c0466447d 100644 --- a/llama.h +++ b/llama.h @@ -199,13 +199,15 @@ extern "C" { }; struct llama_model_kv_override { - char key[128]; enum llama_model_kv_override_type tag; - char str_value[128]; + + char key[128]; + union { - int64_t int_value; - double float_value; - bool bool_value; + int64_t val_i64; + double val_f64; + bool val_bool; + char val_str[128]; }; }; From 8360e0c96049a81ef970e4db03b79106558ecfc7 Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Sun, 21 Apr 2024 21:00:34 +0200 Subject: [PATCH 12/13] =?UTF-8?q?no=20need=20to=20add=20a=20NUL=20to=20the?= =?UTF-8?q?=C2=A0std::vector,=C2=A0std::string=C2=A0can=20be=20initialized?= =?UTF-8?q?=20from=20a=20pair=20of=20iterators.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: slaren --- examples/quantize/quantize.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index 4419c04712c96..a934f699a857f 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -169,10 +169,9 @@ static int load_imatrix(const std::string & imatrix_file, std::string & imatrix_ in.read((char *)&m_last_call, sizeof(m_last_call)); int dataset_len; in.read((char *)&dataset_len, sizeof(dataset_len)); - std::vector dataset_as_vec(dataset_len+1); - in.read((char *)dataset_as_vec.data(), dataset_len); - dataset_as_vec[dataset_len] = 0; - imatrix_dataset = std::string{dataset_as_vec.data()}; + std::vector dataset_as_vec(dataset_len); + in.read(dataset_as_vec.data(), dataset_len); + imatrix_dataset.assign(dataset_as_vec.begin(), dataset_as_vec.end()); printf("%s: imatrix dataset='%s'\n", __func__, imatrix_dataset.c_str()); } printf("%s: loaded %d importance matrix entries from %s computed on %d chunks\n", __func__, int(imatrix_data.size()), imatrix_file.c_str(), m_last_call); From bcbdd28cfd42ac25b5753a1c5bd7e1f991f4b653 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Fri, 26 Apr 2024 12:10:39 +0200 Subject: [PATCH 13/13] kv override: ensure string termination --- common/common.cpp | 7 ++++++- examples/quantize/quantize.cpp | 6 ++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index f64943f678f22..007864dc784aa 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -266,7 +266,12 @@ bool parse_kv_override(const char * data, std::vector & } else if (strncmp(sep, "str:", 4) == 0) { sep += 4; kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; - strncpy(kvo.val_str, sep, 128); + if (strlen(sep) > 127) { + fprintf(stderr, "%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data); + return false; + } + strncpy(kvo.val_str, sep, 127); + kvo.val_str[127] = '\0'; } else { fprintf(stderr, "%s: invalid type for KV override '%s'\n", __func__, data); return false; diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index e910c1e3a7fdc..432cc2b4feadf 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -307,14 +307,16 @@ int main(int argc, char ** argv) { llama_model_kv_override kvo; std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_FILE); kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; - strncpy(kvo.val_str, imatrix_file.c_str(), 128); + strncpy(kvo.val_str, imatrix_file.c_str(), 127); + kvo.val_str[127] = '\0'; kv_overrides.emplace_back(std::move(kvo)); } if (!imatrix_dataset.empty()) { llama_model_kv_override kvo; std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_DATASET); kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; - strncpy(kvo.val_str, imatrix_dataset.c_str(), 128); + strncpy(kvo.val_str, imatrix_dataset.c_str(), 127); + kvo.val_str[127] = '\0'; kv_overrides.emplace_back(std::move(kvo)); }