Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cpp/common/build-info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
#include <cstdio>
#include <string>

int LLAMA_BUILD_NUMBER = 9204;
char const * LLAMA_COMMIT = "726704a";
int LLAMA_BUILD_NUMBER = 9222;
char const * LLAMA_COMMIT = "9a532ae";
char const * LLAMA_COMPILER = "unknown";
char const * LLAMA_BUILD_TARGET = "unknown";

Expand Down
38 changes: 19 additions & 19 deletions cpp/ggml-ext.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,45 +12,45 @@
// Meta backend
//

#define LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_META_MAX_DEVICES 16
#define LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_META_MAX_DEVICES 16

enum lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_split_axis {
enum lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_split_axis {
// tensor split by tensor dimensions:
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_SPLIT_AXIS_0 = 0,
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_SPLIT_AXIS_1 = 1,
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_SPLIT_AXIS_2 = 2,
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_SPLIT_AXIS_3 = 3,
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_SPLIT_AXIS_0 = 0,
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_SPLIT_AXIS_1 = 1,
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_SPLIT_AXIS_2 = 2,
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_SPLIT_AXIS_3 = 3,

LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_SPLIT_AXIS_MIRRORED = 10, // all values on all backends
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_SPLIT_AXIS_PARTIAL = 11, // each backend has a partial sum
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_SPLIT_AXIS_MIRRORED = 10, // all values on all backends
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_SPLIT_AXIS_PARTIAL = 11, // each backend has a partial sum

// for internal bookkeeping only:
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_SPLIT_AXIS_NONE = 98,
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_SPLIT_AXIS_UNKNOWN = 99,
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_SPLIT_AXIS_NONE = 98,
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_SPLIT_AXIS_UNKNOWN = 99,
};
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_API const char * lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_split_axis_name(enum lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_split_axis split_axis);
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_API const char * lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_split_axis_name(enum lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_split_axis split_axis);

struct lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_split_state {
enum lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_split_axis axis;
struct lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_split_state {
enum lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_split_axis axis;

// for tensors with axis >= 0 && axis < LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_MAX_DIMS:
// for tensors with axis >= 0 && axis < LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_MAX_DIMS:
// - each device has a slice of the tensor along the split axis
// - most tensors have n_segments == 1 and a contiguous slice of the tensor data
// - some tensors have an inhomogenenous data layout along the split axis,
// those tensors are divided into segments which are each individually split across devices
// - ne has one entry per segment and device that add up to lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_tensor::ne for that axis,
// - ne has one entry per segment and device that add up to lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_tensor::ne for that axis,
// the outer/inner loops are over segments/devices like [seg0_dev0, seg0_dev1, seg1_dev0, seg1_dev1],
// - for example, a transformer may have a fused QKV matrix rather than 3 matrices, those would be 3 separate segments
// that each need to be split individually across devices so that each device gets a slice of Q, K, and V
int64_t ne[16*LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_META_MAX_DEVICES];
int64_t ne[16*LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_BACKEND_META_MAX_DEVICES];
uint32_t n_segments;
};

// function to assign split states for statically allocated tensors, compute tensor split states will be assigned to be compatible:
typedef struct lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_split_state(*lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_get_split_state_t)(const struct lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_tensor * tensor, void * userdata);
typedef struct lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_split_state(*lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_get_split_state_t)(const struct lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_tensor * tensor, void * userdata);

// create a new meta device from "simple" devices, meta buffer type/buffer/backend is then derived from this:
// TODO: this looks a bit strange - a backend API creates a device. I think we should try
// express this as a backend registry functionality instead
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_API lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_dev_t lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_device(
lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_dev_t * devs, size_t n_devs, lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_get_split_state_t get_split_state, void * get_split_state_ud);
LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_LM_GGML_API lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_dev_t lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_device(
lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_dev_t * devs, size_t n_devs, lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_backend_meta_get_split_state_t get_split_state, void * get_split_state_ud);
38 changes: 38 additions & 0 deletions cpp/ggml-hexagon/ggml-hexagon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2744,6 +2744,18 @@ static bool lm_ggml_hexagon_supported_ssm_conv(const struct lm_ggml_hexagon_sess
return true;
}

static bool lm_ggml_hexagon_supported_pad(const struct lm_ggml_hexagon_session * sess, const struct lm_ggml_tensor * op) {
const struct lm_ggml_tensor * src0 = op->src[0];
const struct lm_ggml_tensor * dst = op;

if (src0->type != LM_GGML_TYPE_F32 || dst->type != LM_GGML_TYPE_F32) {
return false;
}

LM_GGML_UNUSED(sess);
return true;
}

static bool lm_ggml_hexagon_supported_cumsum(const struct lm_ggml_hexagon_session * sess, const struct lm_ggml_tensor * op) {
const struct lm_ggml_tensor * src0 = op->src[0];
const struct lm_ggml_tensor * dst = op;
Expand Down Expand Up @@ -2816,6 +2828,21 @@ static bool lm_ggml_hexagon_supported_solve_tri(const struct lm_ggml_hexagon_ses
return true;
}

static bool lm_ggml_hexagon_supported_tri(const struct lm_ggml_hexagon_session * sess, const struct lm_ggml_tensor * op) {

const struct lm_ggml_tensor * src0 = op->src[0];
const struct lm_ggml_tensor * dst = op;

if (src0->type != LM_GGML_TYPE_F32) { return false; }
if (dst->type != LM_GGML_TYPE_F32) { return false; }
if (!lm_ggml_are_same_shape(src0, dst)) { return false; }
if (!lm_ggml_is_contiguous(src0) || !lm_ggml_is_contiguous(dst)) { return false; }

return true;

LM_GGML_UNUSED(sess);
}

static const char * lm_ggml_backend_hexagon_name(lm_ggml_backend_t backend) {
auto sess = static_cast<lm_ggml_hexagon_session *>(backend->context);
return sess->c_name();
Expand Down Expand Up @@ -2857,6 +2884,9 @@ static htp_op_code op_remap_to_htp(const lm_ggml_tensor * t) {
case LM_GGML_OP_FILL: return HTP_OP_FILL;
case LM_GGML_OP_DIAG: return HTP_OP_DIAG;
case LM_GGML_OP_SOLVE_TRI: return HTP_OP_SOLVE_TRI;
case LM_GGML_OP_TRI: return HTP_OP_TRI;
case LM_GGML_OP_PAD: return HTP_OP_PAD;

case LM_GGML_OP_UNARY:
switch (lm_ggml_get_unary_op(t)) {
case LM_GGML_UNARY_OP_SILU: return HTP_OP_UNARY_SILU;
Expand Down Expand Up @@ -3416,6 +3446,14 @@ static bool lm_ggml_backend_hexagon_device_supports_op(lm_ggml_backend_dev_t dev
supp = lm_ggml_hexagon_supported_solve_tri(sess, op);
break;

case LM_GGML_OP_TRI:
supp = lm_ggml_hexagon_supported_tri(sess, op);
break;

case LM_GGML_OP_PAD:
supp = lm_ggml_hexagon_supported_pad(sess, op);
break;

default:
break;
}
Expand Down
1 change: 1 addition & 0 deletions cpp/ggml-hexagon/htp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ add_library(${HTP_LIB} SHARED
diag-ops.c
solve-tri-ops.c
gated-delta-net-ops.c
pad-ops.c
)

target_compile_definitions(${HTP_LIB} PRIVATE
Expand Down
2 changes: 2 additions & 0 deletions cpp/ggml-hexagon/htp/htp-ctx.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,5 +107,7 @@ int op_fill(struct htp_ops_context * octx);
int op_diag(struct htp_ops_context * octx);
int op_solve_tri(struct htp_ops_context * octx);
int op_gated_delta_net(struct htp_ops_context * octx);
int op_tri(struct htp_ops_context * octx);
int op_pad(struct htp_ops_context * octx);

#endif /* HTP_CTX_H */
4 changes: 2 additions & 2 deletions cpp/ggml-hexagon/htp/htp-msg.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ enum htp_status {
HTP_STATUS_VTCM_TOO_SMALL = 5,
};

// The values must match the lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_type.
// The values must match the lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_type.
// Duplicated here because we can't include full ggml.h in the htp build.
// We have some static_asserts in the cpp code to ensure things are in sync.
enum htp_data_type {
Expand Down Expand Up @@ -130,7 +130,7 @@ struct htp_tensor {
uint32_t data; // Buffer offset in the messages, and data pointer on the NSP
uint32_t type; // Data type
uint32_t ne[HTP_MAX_DIMS]; // Number of elements
uint32_t nb[HTP_MAX_DIMS]; // Stride in bytes (see ggml.h lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_tensor)
uint32_t nb[HTP_MAX_DIMS]; // Stride in bytes (see ggml.h lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_lm_ggml_tensor)
};

#define HTP_MAX_OP_PARAMS 64
Expand Down
2 changes: 2 additions & 0 deletions cpp/ggml-hexagon/htp/htp-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ enum htp_op_code {
HTP_OP_SOLVE_TRI,
HTP_OP_L2_NORM,
HTP_OP_GATED_DELTA_NET,
HTP_OP_TRI,
HTP_OP_PAD,

HTP_OP_INVALID
};
Expand Down
6 changes: 6 additions & 0 deletions cpp/ggml-hexagon/htp/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -595,9 +595,15 @@ static int execute_op(struct htp_ops_context * octx) {
case HTP_OP_SOLVE_TRI:
return op_solve_tri(octx);

case HTP_OP_PAD:
return op_pad(octx);

case HTP_OP_GATED_DELTA_NET:
return op_gated_delta_net(octx);

case HTP_OP_TRI:
return op_tri(octx);

case HTP_OP_INVALID:
break;

Expand Down
Loading