-
Notifications
You must be signed in to change notification settings - Fork 19.8k
ggml: gguf_init_from_callback and gguf_init_from_buffer
#22341
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
9b2aa8c
de9ebec
59883b3
5950daa
b120c9b
65f7136
0a59d4e
62f4e95
78bff38
dcca71d
6248d3c
913a28f
7157cc0
5908c58
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -233,6 +233,10 @@ struct gguf_reader { | |||||
| nbytes_remain = file_remain(file); | ||||||
| } | ||||||
|
|
||||||
| gguf_reader(const void * data, size_t size) | ||||||
| : data(static_cast<const uint8_t *>(data)), nbytes_remain(size) { | ||||||
| } | ||||||
|
|
||||||
| // helper for remaining bytes in a file | ||||||
| static uint64_t file_remain(FILE * file) { | ||||||
| const int64_t cur = gguf_ftell(file); | ||||||
|
|
@@ -260,7 +264,7 @@ struct gguf_reader { | |||||
| if (nbytes_remain < size) { | ||||||
| return false; | ||||||
| } | ||||||
| const size_t nread = fread(&dst, 1, size, file); | ||||||
| const size_t nread = read_raw(&dst, size); | ||||||
| nbytes_remain -= nread; | ||||||
| return nread == size; | ||||||
| } | ||||||
|
|
@@ -344,7 +348,7 @@ struct gguf_reader { | |||||
| return false; | ||||||
| } | ||||||
| dst.resize(static_cast<size_t>(size)); | ||||||
| const size_t nread = fread(dst.data(), 1, size, file); | ||||||
| const size_t nread = read_raw(dst.data(), static_cast<size_t>(size)); | ||||||
| nbytes_remain -= nread; | ||||||
| return nread == size; | ||||||
| } | ||||||
|
|
@@ -353,14 +357,64 @@ struct gguf_reader { | |||||
| if (size > nbytes_remain) { | ||||||
| return false; | ||||||
| } | ||||||
| const size_t nread = fread(dst, 1, size, file); | ||||||
| const size_t nread = read_raw(dst, size); | ||||||
| nbytes_remain -= nread; | ||||||
| return nread == size; | ||||||
| } | ||||||
|
|
||||||
| uint64_t tell() const { | ||||||
| if (file != nullptr) { | ||||||
| const int64_t cur = gguf_ftell(file); | ||||||
| return cur < 0 | ||||||
| ? 0 | ||||||
| : static_cast<uint64_t>(cur); | ||||||
| } | ||||||
|
|
||||||
| return data_offset; | ||||||
| } | ||||||
|
|
||||||
| bool seek(uint64_t absolute_offset) const { | ||||||
| if (file != nullptr) { | ||||||
| const int64_t cur = gguf_ftell(file); | ||||||
| const uint64_t end_offset = cur < 0 | ||||||
| ? nbytes_remain | ||||||
| : static_cast<uint64_t>(cur) + nbytes_remain; | ||||||
|
|
||||||
| if (absolute_offset > end_offset || gguf_fseek(file, absolute_offset, SEEK_SET) != 0) { | ||||||
| return false; | ||||||
| } | ||||||
|
|
||||||
| nbytes_remain = end_offset - absolute_offset; | ||||||
| } else { | ||||||
| const uint64_t end_offset = data_offset + nbytes_remain; | ||||||
| if (absolute_offset > end_offset) { | ||||||
| return false; | ||||||
| } | ||||||
|
|
||||||
| data_offset = static_cast<size_t>(absolute_offset); | ||||||
| nbytes_remain = end_offset - absolute_offset; | ||||||
| } | ||||||
|
|
||||||
| return true; | ||||||
| } | ||||||
|
|
||||||
| private: | ||||||
| FILE * file; | ||||||
| size_t read_raw(void * dst, size_t size) const { | ||||||
| if (file != nullptr) { | ||||||
| return fread(dst, 1, size, file); | ||||||
| } else if (data == nullptr || size > nbytes_remain || data_offset + size < data_offset) { | ||||||
| return 0; | ||||||
| } | ||||||
|
|
||||||
| memcpy(dst, data + data_offset, size); | ||||||
| data_offset += size; | ||||||
| return size; | ||||||
| } | ||||||
|
|
||||||
| FILE * file = nullptr; | ||||||
| const uint8_t * data = nullptr; | ||||||
|
|
||||||
| mutable size_t data_offset = 0; | ||||||
| mutable uint64_t nbytes_remain; | ||||||
| }; | ||||||
|
|
||||||
|
|
@@ -394,12 +448,7 @@ bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct | |||||
| return true; | ||||||
| } | ||||||
|
|
||||||
| struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params) { | ||||||
| if (!file) { | ||||||
| return nullptr; | ||||||
| } | ||||||
|
|
||||||
| const struct gguf_reader gr(file); | ||||||
| static struct gguf_context * gguf_init_from_reader(const struct gguf_reader & gr, struct gguf_init_params params) { | ||||||
| struct gguf_context * ctx = new gguf_context; | ||||||
|
|
||||||
| bool ok = true; | ||||||
|
|
@@ -700,14 +749,14 @@ struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_para | |||||
| GGML_ASSERT(int64_t(ctx->info.size()) == n_tensors); | ||||||
|
|
||||||
| // we require the data section to be aligned, so take into account any padding | ||||||
| if (gguf_fseek(file, GGML_PAD(gguf_ftell(file), ctx->alignment), SEEK_SET) != 0) { | ||||||
| if (!gr.seek(GGML_PAD(gr.tell(), ctx->alignment))) { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
The CI is failing on the edge case of a GGUF file with 0 tensors in which case the file size is not padded. I would say to just handle that edge case like this.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||||||
| GGML_LOG_ERROR("%s: failed to seek to beginning of data section\n", __func__); | ||||||
| gguf_free(ctx); | ||||||
| return nullptr; | ||||||
| } | ||||||
|
|
||||||
| // store the current file offset - this is where the data section starts | ||||||
| ctx->offset = gguf_ftell(file); | ||||||
| ctx->offset = gr.tell(); | ||||||
|
|
||||||
| // compute the total size of the data section, taking into account the alignment | ||||||
| { | ||||||
|
|
@@ -844,6 +893,24 @@ struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_para | |||||
| return ctx; | ||||||
| } | ||||||
|
|
||||||
| struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params) { | ||||||
| if (!file) { | ||||||
| return nullptr; | ||||||
| } | ||||||
|
|
||||||
| const struct gguf_reader gr(file); | ||||||
| return gguf_init_from_reader(gr, params); | ||||||
| } | ||||||
|
|
||||||
| struct gguf_context * gguf_init_from_buffer(const void * data, size_t size, struct gguf_init_params params) { | ||||||
| if (data == nullptr || size == 0) { | ||||||
| return nullptr; | ||||||
| } | ||||||
|
|
||||||
| const struct gguf_reader gr(data, size); | ||||||
| return gguf_init_from_reader(gr, params); | ||||||
| } | ||||||
|
|
||||||
| struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) { | ||||||
| FILE * file = ggml_fopen(fname, "rb"); | ||||||
|
|
||||||
|
|
||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are you changing the model loader?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When loading a model using
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A bugfix like that should be its own PR, remove the model loader changes from this one.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. Will open a new PR once this one gets merged. Update: opened #22566 |
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove this file, the test cases in
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added this file to validate the fix I made in If you remove the changes I made in
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I removed this file and will introduce it back in the next PR with the fixes to |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,109 @@ | ||
| #include "ggml-backend.h" | ||
| #include "get-model.h" | ||
| #include "llama.h" | ||
| #include "gguf.h" | ||
|
|
||
| #include "../src/llama-model.h" | ||
|
|
||
| #include <cstdint> | ||
| #include <cstdio> | ||
| #include <cstdlib> | ||
| #include <vector> | ||
|
|
||
| static std::vector<uint8_t> read_file_to_buffer(FILE * file) { | ||
| if (file == nullptr || fseek(file, 0, SEEK_END) != 0) { | ||
| return {}; | ||
| } | ||
|
|
||
| const long size = ftell(file); | ||
| if (size < 0) { | ||
| return {}; | ||
| } | ||
|
|
||
| rewind(file); | ||
|
|
||
| std::vector<uint8_t> data(static_cast<size_t>(size)); | ||
| if (fread(data.data(), 1, data.size(), file) != data.size()) { | ||
| return {}; | ||
| } | ||
|
|
||
| return data; | ||
| } | ||
|
|
||
| static void set_tensor_data_noop(struct ggml_tensor * tensor, void * userdata) { | ||
| GGML_UNUSED(tensor); | ||
| GGML_UNUSED(userdata); | ||
| } | ||
|
|
||
| int main(int argc, char * argv[]) { | ||
| char * model_path = get_model_or_exit(argc, argv); | ||
| FILE * file = fopen(model_path, "rb"); | ||
| if (file == nullptr) { | ||
| fprintf(stderr, "failed to open model at '%s'\n", model_path); | ||
| return EXIT_FAILURE; | ||
| } | ||
|
|
||
| const std::vector<uint8_t> data = read_file_to_buffer(file); | ||
| fclose(file); | ||
| if (data.empty()) { | ||
| fprintf(stderr, "failed to read model at '%s'\n", model_path); | ||
| return EXIT_FAILURE; | ||
| } | ||
|
|
||
| llama_backend_init(); | ||
|
|
||
| ggml_backend_dev_t cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU); | ||
| if (cpu_dev == nullptr) { | ||
| llama_backend_free(); | ||
| return EXIT_FAILURE; | ||
| } | ||
|
|
||
| ggml_backend_dev_t devices[] = { cpu_dev, nullptr }; | ||
|
|
||
| llama_model_params model_params = llama_model_default_params(); | ||
| model_params.devices = devices; | ||
| model_params.no_alloc = true; | ||
| model_params.use_mmap = false; | ||
| model_params.progress_callback = [](float progress, void * user_data) { | ||
| GGML_UNUSED(progress); | ||
| GGML_UNUSED(user_data); | ||
| return true; | ||
| }; | ||
|
|
||
| gguf_init_params gguf_params = { | ||
| /*.no_alloc = */ true, | ||
| /*.ctx = */ nullptr, | ||
| }; | ||
| gguf_context * gguf_ctx = gguf_init_from_buffer(data.data(), data.size(), gguf_params); | ||
| if (gguf_ctx == nullptr || gguf_get_n_tensors(gguf_ctx) <= 0) { | ||
| gguf_free(gguf_ctx); | ||
| llama_backend_free(); | ||
| return EXIT_FAILURE; | ||
| } | ||
|
|
||
| llama_model * model_from_file = llama_model_load_from_file(model_path, model_params); | ||
| if (model_from_file == nullptr) { | ||
| gguf_free(gguf_ctx); | ||
| llama_backend_free(); | ||
| return EXIT_FAILURE; | ||
| } | ||
|
|
||
| llama_model * model_from_buffer = llama_model_init_from_user(gguf_ctx, set_tensor_data_noop, nullptr, model_params); | ||
| if (model_from_buffer == nullptr) { | ||
| llama_model_free(model_from_file); | ||
| gguf_free(gguf_ctx); | ||
| llama_backend_free(); | ||
| return EXIT_FAILURE; | ||
| } | ||
|
|
||
| const auto mb_from_file = model_from_file->memory_breakdown(); | ||
| const auto mb_from_buffer = model_from_buffer->memory_breakdown(); | ||
| const bool ok = !mb_from_file.empty() && mb_from_file == mb_from_buffer; | ||
|
|
||
| llama_model_free(model_from_buffer); | ||
| llama_model_free(model_from_file); | ||
| gguf_free(gguf_ctx); | ||
| llama_backend_free(); | ||
|
|
||
| return ok ? EXIT_SUCCESS : EXIT_FAILURE; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The last check makes no sense to me. Is it a bug?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I meant to check for an overflow there since both
data_offsetandsizeare unsigned