Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions ggml/include/gguf.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,18 +76,24 @@ extern "C" {
struct ggml_context ** ctx;
};

// callback to simulate or wrap a FILE pointer - read up to `len` bytes at `offset` into `output` and return the number of bytes read
typedef size_t (*gguf_reader_callback_t)(void * userdata, void * output, uint64_t offset, size_t len);

GGML_API struct gguf_context * gguf_init_empty(void);
GGML_API struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params);
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
GGML_API struct gguf_context * gguf_init_from_buffer(const void * data, size_t size, struct gguf_init_params params);

// max_chunk_read is the maximum number of bytes that the GGUF code will read at once from the callback, a value of 0 means no limit
GGML_API struct gguf_context * gguf_init_from_callback(gguf_reader_callback_t callback, void * userdata, size_t max_chunk_read, uint64_t max_expected_size, struct gguf_init_params params);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
GGML_API struct gguf_context * gguf_init_from_callback(gguf_reader_callback_t callback, void * userdata, size_t max_chunk_read, uint64_t max_expected_size, struct gguf_init_params params);
// max_chunk_read is the maximum number of bytes that the GGUF code will read at once from the callback, a value of 0 means no limit
GGML_API struct gguf_context * gguf_init_from_callback(gguf_reader_callback_t callback, void * userdata, size_t max_chunk_read, uint64_t max_expected_size, struct gguf_init_params params);

I would say we should maybe make this small change to the spec where we internally set the value to SIZE_MAX if the user sets 0 to make the code a bit more ergonomic.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.
I'd love if you could review #22566 next after this PR is merged


GGML_API void gguf_free(struct gguf_context * ctx);

GGML_API const char * gguf_type_name(enum gguf_type type);

GGML_API uint32_t gguf_get_version (const struct gguf_context * ctx);
GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx); // padded to gguf_get_alignment if and only if the gguf_context contains at least one tensor

GGML_API int64_t gguf_get_n_kv(const struct gguf_context * ctx);
GGML_API int64_t gguf_find_key(const struct gguf_context * ctx, const char * key); // returns -1 if key is not found
Expand Down
178 changes: 155 additions & 23 deletions ggml/src/gguf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,18 @@ struct gguf_context {
};

struct gguf_reader {
gguf_reader(FILE * file) : file(file) {
// read the remaining bytes once and update on each read
nbytes_remain = file_remain(file);
gguf_reader(
gguf_reader_callback_t callback,
void * userdata,
size_t max_chunk_read,
uint64_t data_offset = 0,
uint64_t nbytes_remain = 0)
: callback(callback),
userdata(userdata),
max_chunk_read(max_chunk_read),
data_offset(data_offset),
nbytes_remain(nbytes_remain) {
Comment thread
JohannesGaessler marked this conversation as resolved.
GGML_ASSERT(max_chunk_read > 0);
}

// helper for remaining bytes in a file
Expand All @@ -257,12 +266,10 @@ struct gguf_reader {
template <typename T>
bool read(T & dst) const {
const size_t size = sizeof(dst);
if (nbytes_remain < size) {
if (size > nbytes_remain) {
return false;
}
const size_t nread = fread(&dst, 1, size, file);
nbytes_remain -= nread;
return nread == size;
return read_raw(&dst, size) == size;
}

template <typename T>
Expand Down Expand Up @@ -344,24 +351,71 @@ struct gguf_reader {
return false;
}
dst.resize(static_cast<size_t>(size));
const size_t nread = fread(dst.data(), 1, size, file);
nbytes_remain -= nread;
return nread == size;
return read_raw(dst.data(), static_cast<size_t>(size)) == size;
}

bool read(void * dst, const size_t size) const {
if (size > nbytes_remain) {
return false;
}
const size_t nread = fread(dst, 1, size, file);
nbytes_remain -= nread;
return nread == size;
return read_raw(dst, size) == size;
}

uint64_t tell() const {
return data_offset;
}

bool seek(uint64_t absolute_offset) const {
const uint64_t end_offset = uint64_t(data_offset) + nbytes_remain;
if (absolute_offset > end_offset) {
return false;
}

data_offset = absolute_offset;
nbytes_remain = end_offset - absolute_offset;

return true;
}

private:
FILE * file;
size_t read_raw(void * dst, size_t size) const {
if (callback == nullptr || size == 0) {
return 0;
}

uint8_t * data = static_cast<uint8_t *>(dst);
size_t total_nread = 0;
bool reached_eof = false;

mutable uint64_t nbytes_remain;
while (total_nread < size) {
const size_t chunk_size = std::min(max_chunk_read, size - total_nread);
if (data_offset + total_nread < data_offset) {
break;
}
const size_t nread = callback(userdata, static_cast<void *>(data + total_nread), data_offset + total_nread, chunk_size);
total_nread += nread;
if (nread != chunk_size) {
reached_eof = true;
break;
}
}

data_offset += total_nread;
GGML_ASSERT(total_nread <= nbytes_remain);
nbytes_remain -= total_nread;

if (reached_eof) {
nbytes_remain = 0;
}

return total_nread;
}

gguf_reader_callback_t callback = nullptr;
void * userdata = nullptr;
size_t max_chunk_read = 0;
mutable uint64_t data_offset = 0;
mutable uint64_t nbytes_remain = 0;
};

struct gguf_context * gguf_init_empty(void) {
Expand Down Expand Up @@ -394,12 +448,7 @@ bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct
return true;
}

struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params) {
if (!file) {
return nullptr;
}

const struct gguf_reader gr(file);
static struct gguf_context * gguf_init_from_reader(const struct gguf_reader & gr, struct gguf_init_params params) {
struct gguf_context * ctx = new gguf_context;

bool ok = true;
Expand Down Expand Up @@ -700,14 +749,14 @@ struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_para
GGML_ASSERT(int64_t(ctx->info.size()) == n_tensors);

// we require the data section to be aligned, so take into account any padding
if (gguf_fseek(file, GGML_PAD(gguf_ftell(file), ctx->alignment), SEEK_SET) != 0) {
if (n_tensors > 0 && !gr.seek(GGML_PAD(gr.tell(), ctx->alignment))) {
GGML_LOG_ERROR("%s: failed to seek to beginning of data section\n", __func__);
gguf_free(ctx);
return nullptr;
}

// store the current file offset - this is where the data section starts
ctx->offset = gguf_ftell(file);
ctx->offset = gr.tell();

// compute the total size of the data section, taking into account the alignment
{
Expand Down Expand Up @@ -844,6 +893,89 @@ struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_para
return ctx;
}

struct gguf_context * gguf_init_from_callback(gguf_reader_callback_t callback, void * userdata, size_t max_chunk_read, uint64_t max_expected_size, struct gguf_init_params params) {
if (callback == nullptr) {
return nullptr;
}

const struct gguf_reader gr(callback, userdata, max_chunk_read == 0 ? SIZE_MAX : max_chunk_read, 0, max_expected_size);
return gguf_init_from_reader(gr, params);
}

struct gguf_file_reader {
FILE * file;
uint64_t offset;
};

static size_t gguf_file_reader_callback(void * userdata, void * output, uint64_t offset, size_t len) {
Comment thread
JohannesGaessler marked this conversation as resolved.
GGML_ASSERT(len > 0);

gguf_file_reader & reader = *static_cast<gguf_file_reader *>(userdata);

if (reader.offset != offset) {
if (offset > INT64_MAX || gguf_fseek(reader.file, static_cast<int64_t>(offset), SEEK_SET) != 0) {
return 0;
}

reader.offset = offset;
}

const size_t nread = fread(static_cast<uint8_t *>(output), 1, len, reader.file);
reader.offset += nread;
return nread;
}

struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params) {
if (!file) {
return nullptr;
}

const int64_t cur = gguf_ftell(file);
if (cur < 0) {
return nullptr;
}

gguf_file_reader reader = {
/*.file = */ file,
/*.offset = */ static_cast<uint64_t>(cur),
};
const struct gguf_reader gr(gguf_file_reader_callback, &reader, SIZE_MAX, reader.offset, gguf_reader::file_remain(file));
return gguf_init_from_reader(gr, params);
}

struct gguf_buffer_reader {
const uint8_t * data;
size_t size;
};

static size_t gguf_buffer_reader_callback(void * userdata, void * output, uint64_t offset, size_t len) {
GGML_ASSERT(len > 0);

const gguf_buffer_reader & reader = *static_cast<gguf_buffer_reader *>(userdata);

if (offset > reader.size || len > reader.size - offset) {
return 0;
}
Comment thread
JohannesGaessler marked this conversation as resolved.

const size_t data_offset = static_cast<size_t>(offset);
const size_t nread = std::min(len, reader.size - data_offset);
memcpy(static_cast<uint8_t *>(output), reader.data + data_offset, nread);
return nread;
}

struct gguf_context * gguf_init_from_buffer(const void * data, size_t size, struct gguf_init_params params) {
if (data == nullptr || size == 0) {
return nullptr;
}

gguf_buffer_reader reader = {
/*.data = */ static_cast<const uint8_t *>(data),
/*.size = */ size,
};
const struct gguf_reader gr(gguf_buffer_reader_callback, &reader, SIZE_MAX, 0, size);
return gguf_init_from_reader(gr, params);
}

struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
FILE * file = ggml_fopen(fname, "rb");

Expand Down
90 changes: 85 additions & 5 deletions tests/test-gguf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,42 @@ static void helper_write(FILE * file, const void * data, const size_t nbytes) {
GGML_ASSERT(fwrite(data, 1, nbytes, file) == nbytes);
}

static std::vector<uint8_t> read_file_to_buffer(FILE * file) {
GGML_ASSERT(file != nullptr);
GGML_ASSERT(fseek(file, 0, SEEK_END) == 0);

const long size = ftell(file);
GGML_ASSERT(size >= 0);

rewind(file);

std::vector<uint8_t> data(static_cast<size_t>(size));
GGML_ASSERT(fread(data.data(), 1, data.size(), file) == data.size());

rewind(file);
return data;
}

struct callback_reader_data {
const uint8_t * data;
size_t size;
};

static size_t read_buffer_callback(void * userdata, void * output, uint64_t offset, size_t len) {
GGML_ASSERT(len > 0);

const callback_reader_data & reader = *static_cast<callback_reader_data *>(userdata);

if (offset > reader.size || len > reader.size - offset) {
return 0;
}

const size_t data_offset = static_cast<size_t>(offset);
const size_t nread = std::min(len, reader.size - data_offset);
memcpy(static_cast<uint8_t *>(output), reader.data + data_offset, nread);
return nread;
}

static FILE * get_handcrafted_file(const unsigned int seed, const enum handcrafted_file_type hft, const int extra_bytes = 0) {
FILE * file = tmpfile();

Expand Down Expand Up @@ -1095,10 +1131,29 @@ static bool same_tensor_data(const struct ggml_context * orig, const struct ggml
return ok;
}

static std::pair<int, int> test_roundtrip(ggml_backend_dev_t dev, const unsigned int seed, const bool only_meta) {
enum roundtrip_read_mode {
ROUNDTRIP_READ_MODE_FILE,
ROUNDTRIP_READ_MODE_BUFFER,
ROUNDTRIP_READ_MODE_CALLBACK,
};

static const char * roundtrip_read_mode_name(const roundtrip_read_mode mode) {
switch (mode) {
case ROUNDTRIP_READ_MODE_FILE: return "file";
case ROUNDTRIP_READ_MODE_BUFFER: return "buffer";
case ROUNDTRIP_READ_MODE_CALLBACK: return "callback";
}

GGML_ABORT("fatal error");
}

static std::pair<int, int> test_roundtrip(
ggml_backend_dev_t dev, const unsigned int seed, const bool only_meta,
const roundtrip_read_mode read_mode) {
ggml_backend_t backend = ggml_backend_dev_init(dev, nullptr);
printf("%s: device=%s, backend=%s, only_meta=%s\n",
__func__, ggml_backend_dev_description(dev), ggml_backend_name(backend), only_meta ? "yes" : "no");
printf("%s: device=%s, backend=%s, only_meta=%s, read_mode=%s\n",
__func__, ggml_backend_dev_description(dev), ggml_backend_name(backend),
only_meta ? "yes" : "no", roundtrip_read_mode_name(read_mode));

int npass = 0;
int ntest = 0;
Expand Down Expand Up @@ -1133,7 +1188,22 @@ static std::pair<int, int> test_roundtrip(ggml_backend_dev_t dev, const unsigned
/*no_alloc =*/ false,
/*ctx =*/ only_meta ? nullptr : &ctx_1,
};
struct gguf_context * gguf_ctx_1 = gguf_init_from_file_ptr(file, gguf_params);
struct gguf_context * gguf_ctx_1 = nullptr;
const std::vector<uint8_t> data = read_mode == ROUNDTRIP_READ_MODE_FILE
? std::vector<uint8_t>()
: read_file_to_buffer(file);

if (read_mode == ROUNDTRIP_READ_MODE_BUFFER) {
gguf_ctx_1 = gguf_init_from_buffer(data.data(), data.size(), gguf_params);
} else if (read_mode == ROUNDTRIP_READ_MODE_CALLBACK) {
callback_reader_data reader = {
/*.data = */ data.data(),
/*.size = */ data.size(),
};
gguf_ctx_1 = gguf_init_from_callback(read_buffer_callback, &reader, 4096, 4ull << 30 /* 4GB */, gguf_params);
} else {
gguf_ctx_1 = gguf_init_from_file_ptr(file, gguf_params);
}

printf("%s: same_version: ", __func__);
if (gguf_get_version(gguf_ctx_0) == gguf_get_version(gguf_ctx_1)) {
Expand Down Expand Up @@ -1343,7 +1413,17 @@ int main(int argc, char ** argv) {
ggml_backend_dev_t dev = ggml_backend_dev_get(i);

for (bool only_meta : {true, false}) {
std::pair<int, int> result = test_roundtrip(dev, seed, only_meta);
std::pair<int, int> result = test_roundtrip(dev, seed, only_meta, ROUNDTRIP_READ_MODE_FILE);
npass += result.first;
ntest += result.second;
}
{
std::pair<int, int> result = test_roundtrip(dev, seed, /*only_meta=*/false, ROUNDTRIP_READ_MODE_BUFFER);
npass += result.first;
ntest += result.second;
}
{
std::pair<int, int> result = test_roundtrip(dev, seed, /*only_meta=*/false, ROUNDTRIP_READ_MODE_CALLBACK);
npass += result.first;
ntest += result.second;
}
Expand Down
Loading