Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ build-sanitize-thread/
build-cov/
build-ci-debug/
build-ci-release/
build-cublas/
out/
tmp/
models/
Expand Down
10 changes: 6 additions & 4 deletions examples/gpt-2/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,11 @@ struct gpt2_model {

//
struct ggml_context * ctx;
ggml_backend_t backend = NULL;
ggml_backend_buffer_t buffer_w;
ggml_backend_buffer_t buffer_kv;
struct ggml_backend * backend = NULL;

struct ggml_backend_buffer * buffer_w;
struct ggml_backend_buffer * buffer_kv;

std::map<std::string, struct ggml_tensor *> tensors;
};

Expand Down Expand Up @@ -826,7 +828,7 @@ int main(int argc, char ** argv) {
}

// keep this buffer alive while evaluating the model
ggml_backend_buffer_t buf_compute;
struct ggml_backend_buffer * buf_compute;

struct ggml_allocr * allocr = NULL;
// allocate the compute buffer
Expand Down
12 changes: 6 additions & 6 deletions include/ggml/ggml-alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,18 @@ GGML_API struct ggml_allocr * ggml_allocr_new_from_buffer(struct ggml_backend_bu
// you should call this if your graph are optimized to execute out-of-order
GGML_API void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, const int * list, int n);

GGML_API void ggml_allocr_free(struct ggml_allocr * alloc);
GGML_API bool ggml_allocr_is_measure(struct ggml_allocr * alloc);
GGML_API void ggml_allocr_reset(struct ggml_allocr * alloc);
GGML_API void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor);
GGML_API void ggml_allocr_free (struct ggml_allocr * alloc);
GGML_API bool ggml_allocr_is_measure (struct ggml_allocr * alloc);
GGML_API void ggml_allocr_reset (struct ggml_allocr * alloc);
GGML_API void ggml_allocr_alloc (struct ggml_allocr * alloc, struct ggml_tensor * tensor);
GGML_API size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * graph);
GGML_API size_t ggml_allocr_max_size(struct ggml_allocr * alloc);
GGML_API size_t ggml_allocr_max_size (struct ggml_allocr * alloc);

GGML_API size_t ggml_allocr_alloc_graph_n(
struct ggml_allocr * alloc,
struct ggml_cgraph ** graphs, int n_graphs,
struct ggml_tensor *** inputs, struct ggml_tensor *** outputs);


#ifdef __cplusplus
}
#endif
149 changes: 89 additions & 60 deletions include/ggml/ggml-backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,102 +5,131 @@
#ifdef __cplusplus
extern "C" {
#endif
typedef struct ggml_backend_s * ggml_backend_t;

// backend buffer
struct ggml_backend;
struct ggml_backend_buffer;
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
typedef void * ggml_buffer_context_t;

struct ggml_backend_buffer_interface {
void (*free_buffer) (ggml_backend_buffer_t buffer);
void * (*get_base) (ggml_backend_buffer_t buffer); // get base pointer
size_t (*get_alloc_size)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-allocation callback
void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // post-allocation callback
void (*free_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-free callback
// type-erased backend-specific types / wrappers
typedef void * ggml_backend_context_t;
typedef void * ggml_backend_graph_plan_t;
typedef void * ggml_backend_buffer_context_t;

//
// backend buffer
//

struct ggml_backend_buffer_i {
void (*free_buffer) (struct ggml_backend_buffer * buffer);
void * (*get_base) (struct ggml_backend_buffer * buffer); // get base pointer
size_t (*get_alloc_size)(struct ggml_backend_buffer * buffer, struct ggml_tensor * tensor); // pre-allocation callback
void (*init_tensor) (struct ggml_backend_buffer * buffer, struct ggml_tensor * tensor); // post-allocation callback
void (*free_tensor) (struct ggml_backend_buffer * buffer, struct ggml_tensor * tensor); // pre-free callback
};

struct ggml_backend_buffer {
struct ggml_backend_buffer_interface interface;
ggml_backend_t backend;
ggml_buffer_context_t context;
struct ggml_backend * backend;

struct ggml_backend_buffer_i interface;

ggml_backend_buffer_context_t context;

size_t size;
};

// backend buffer functions
GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(struct ggml_backend_buffer_interface interface, ggml_backend_t backend, ggml_buffer_context_t context, size_t size);
GGML_API void ggml_backend_buffer_free(ggml_backend_buffer_t buffer);
GGML_API size_t ggml_backend_buffer_get_alignment(ggml_backend_buffer_t buffer);
GGML_API void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer);
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
GGML_API void ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
GGML_API void ggml_backend_buffer_free_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);

GGML_API struct ggml_backend_buffer * ggml_backend_buffer_init(
struct ggml_backend * backend,
struct ggml_backend_buffer_i interface,
ggml_backend_buffer_context_t context,
size_t size);

GGML_API void ggml_backend_buffer_free (struct ggml_backend_buffer * buffer);
GGML_API size_t ggml_backend_buffer_get_alignment (struct ggml_backend_buffer * buffer);
GGML_API void * ggml_backend_buffer_get_base (struct ggml_backend_buffer * buffer);
GGML_API size_t ggml_backend_buffer_get_size (struct ggml_backend_buffer * buffer);
GGML_API size_t ggml_backend_buffer_get_alloc_size(struct ggml_backend_buffer * buffer, struct ggml_tensor * tensor);
GGML_API void ggml_backend_buffer_init_tensor (struct ggml_backend_buffer * buffer, struct ggml_tensor * tensor);
GGML_API void ggml_backend_buffer_free_tensor (struct ggml_backend_buffer * buffer, struct ggml_tensor * tensor);

//
// backend
typedef void * ggml_backend_context_t;
typedef void * ggml_graph_plan_t;
//

struct ggml_backend_interface {
const char * (*get_name)(ggml_backend_t backend);
struct ggml_backend_i {
const char * (*get_name)(struct ggml_backend * backend);

void (*free)(ggml_backend_t backend);
void (*free)(struct ggml_backend * backend);

// buffer allocation
ggml_backend_buffer_t (*alloc_buffer)(ggml_backend_t backend, size_t size);
size_t (*get_alignment)(ggml_backend_t backend);
struct ggml_backend_buffer * (*alloc_buffer)(struct ggml_backend * backend, size_t size);

// get buffer alignment
size_t (*get_alignment)(struct ggml_backend * backend);

// tensor data access
// these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize
void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
void (*synchronize) (ggml_backend_t backend);
void (*set_tensor_async)(struct ggml_backend * backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
void (*get_tensor_async)(struct ggml_backend * backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
void (*synchronize) (struct ggml_backend * backend);

// (optional) copy tensor between different backends, allow for single-copy tranfers
void (*cpy_tensor_from)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
void (*cpy_tensor_to) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
void (*cpy_tensor_from)(struct ggml_backend * backend, struct ggml_tensor * src, struct ggml_tensor * dst);
void (*cpy_tensor_to) (struct ggml_backend * backend, struct ggml_tensor * src, struct ggml_tensor * dst);

// compute graph with a plan
ggml_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
void (*graph_plan_free) (ggml_backend_t backend, ggml_graph_plan_t plan);
void (*graph_plan_compute)(ggml_backend_t backend, ggml_graph_plan_t plan);
ggml_backend_graph_plan_t (*graph_plan_create) (struct ggml_backend * backend, struct ggml_cgraph * cgraph);
void (*graph_plan_free) (struct ggml_backend * backend, ggml_backend_graph_plan_t plan);
void (*graph_plan_compute)(struct ggml_backend * backend, ggml_backend_graph_plan_t plan);

// compute graph without a plan
void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
void (*graph_compute)(struct ggml_backend * backend, struct ggml_cgraph * cgraph);

// check if the backend supports an operation
bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
bool (*supports_op)(struct ggml_backend * backend, const struct ggml_tensor * op);
};

struct ggml_backend_s {
struct ggml_backend_interface interface;
struct ggml_backend {
struct ggml_backend_i interface;

ggml_backend_context_t context;
};

// backend helper functions
static inline ggml_backend_t get_backend(const struct ggml_tensor * tensor) { return tensor->buffer->backend; }

static inline const char * ggml_backend_name(ggml_backend_t backend) { return backend->interface.get_name(backend); }
static inline void ggml_backend_free(ggml_backend_t backend) { backend->interface.free(backend); }
static inline ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size) { return backend->interface.alloc_buffer(backend, size); }
static inline size_t ggml_backend_get_alignment(ggml_backend_t backend) { return backend->interface.get_alignment(backend); }
static inline void ggml_backend_tensor_set_async(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { get_backend(tensor)->interface.set_tensor_async(get_backend(tensor), tensor, data, offset, size); }
static inline void ggml_backend_tensor_get_async(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { get_backend(tensor)->interface.get_tensor_async(get_backend(tensor), tensor, data, offset, size); }
static inline void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { get_backend(tensor)->interface.set_tensor_async(get_backend(tensor), tensor, data, offset, size); get_backend(tensor)->interface.synchronize(get_backend(tensor)); }
static inline void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { get_backend(tensor)->interface.get_tensor_async(get_backend(tensor), tensor, data, offset, size); get_backend(tensor)->interface.synchronize(get_backend(tensor)); }
static inline void ggml_backend_synchronize(ggml_backend_t backend) { backend->interface.synchronize(backend); }
static inline ggml_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) { return backend->interface.graph_plan_create(backend, cgraph); }
static inline void ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_graph_plan_t plan) { backend->interface.graph_plan_free(backend, plan); }
static inline void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_graph_plan_t plan) { backend->interface.graph_plan_compute(backend, plan); }
static inline void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { backend->interface.graph_compute(backend, cgraph); }
static inline bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { return backend->interface.supports_op(backend, op); }
GGML_API struct ggml_backend * ggml_get_backend(const struct ggml_tensor * tensor);

GGML_API const char * ggml_backend_name(struct ggml_backend * backend);
GGML_API void ggml_backend_free(struct ggml_backend * backend);

GGML_API struct ggml_backend_buffer * ggml_backend_alloc_buffer(struct ggml_backend * backend, size_t size);

GGML_API size_t ggml_backend_get_alignment(struct ggml_backend * backend);

GGML_API void ggml_backend_tensor_set_async( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
GGML_API void ggml_backend_tensor_get_async(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);

GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);

GGML_API void ggml_backend_synchronize(struct ggml_backend * backend);

GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create (struct ggml_backend * backend, struct ggml_cgraph * cgraph);

GGML_API void ggml_backend_graph_plan_free (struct ggml_backend * backend, ggml_backend_graph_plan_t plan);
GGML_API void ggml_backend_graph_plan_compute(struct ggml_backend * backend, ggml_backend_graph_plan_t plan);
GGML_API void ggml_backend_graph_compute (struct ggml_backend * backend, struct ggml_cgraph * cgraph);
GGML_API bool ggml_backend_supports_op (struct ggml_backend * backend, const struct ggml_tensor * op);

// tensor copy between different backends
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);

//
// CPU backend
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
//

GGML_API struct ggml_backend * ggml_backend_cpu_init(void);

GGML_API void ggml_backend_cpu_set_n_threads(struct ggml_backend * backend_cpu, int n_threads);

GGML_API struct ggml_backend_buffer * ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);

///////////////////////////

Expand Down
7 changes: 4 additions & 3 deletions include/ggml/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ extern "C" {
GGML_TYPE_COUNT,
};

enum ggml_backend {
enum ggml_backend_type {
GGML_BACKEND_CPU = 0,
GGML_BACKEND_GPU = 10,
GGML_BACKEND_GPU_SPLIT = 20,
Expand Down Expand Up @@ -479,8 +479,9 @@ extern "C" {

// n-dimensional tensor
struct ggml_tensor {
enum ggml_type type;
enum ggml_backend backend;
enum ggml_type type;
enum ggml_backend_type backend;

struct ggml_backend_buffer * buffer;

int n_dims;
Expand Down
4 changes: 2 additions & 2 deletions src/ggml-alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ struct free_block {
#define MAX_FREE_BLOCKS 256

struct ggml_allocr {
ggml_backend_buffer_t buffer;
struct ggml_backend_buffer * buffer;
bool buffer_owned;
void * data;
size_t alignment;
Expand Down Expand Up @@ -265,7 +265,7 @@ void ggml_allocr_reset(struct ggml_allocr * alloc) {
alloc->n_free_blocks = 1;
size_t align_offset = aligned_offset(alloc->data, 0, alloc->alignment);
alloc->free_blocks[0].addr = (char *)alloc->data + align_offset;
alloc->free_blocks[0].size = alloc->buffer->size - align_offset;
alloc->free_blocks[0].size = ggml_backend_buffer_get_size(alloc->buffer) - align_offset;
}

struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment) {
Expand Down
Loading